Commit 1665af73 authored by diego's avatar diego

cosmetics: Reformat PPC code in libavcodec according to style guidelines.

This includes indentation changes, comment reformatting, consistent brace
placement and some prettyprinting.


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@14316 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 18d45891
......@@ -96,10 +96,8 @@ void powerpc_display_perf_report(void)
{
int i, j;
av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
for(i = 0 ; i < powerpc_perf_total ; i++)
{
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
{
for(i = 0 ; i < powerpc_perf_total ; i++) {
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
av_log(NULL, AV_LOG_INFO,
" Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
......@@ -117,28 +115,23 @@ void powerpc_display_perf_report(void)
/* ***** WARNING ***** WARNING ***** WARNING ***** */
/*
clear_blocks_dcbz32_ppc will not work properly
on PowerPC processors with a cache line size
not equal to 32 bytes.
Fortunately all processor used by Apple up to
at least the 7450 (aka second generation G4)
use 32 bytes cache line.
This is due to the use of the 'dcbz' instruction.
It simply clear to zero a single cache line,
so you need to know the cache line size to use it !
It's absurd, but it's fast...
clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with a
cache line size not equal to 32 bytes.
Fortunately all processor used by Apple up to at least the 7450 (aka second
generation G4) use 32 bytes cache line.
This is due to the use of the 'dcbz' instruction. It simply clear to zero a
single cache line, so you need to know the cache line size to use it !
It's absurd, but it's fast...
update 24/06/2003 : Apple released yesterday the G5,
with a PPC970. cache line size : 128 bytes. Oups.
The semantic of dcbz was changed, it always clear
32 bytes. so the function below will work, but will
be slow. So I fixed check_dcbz_effect to use dcbzl,
which is defined to clear a cache line (as dcbz before).
So we still can distinguish, and use dcbz (32 bytes)
or dcbzl (one cache line) as required.
update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line
size: 128 bytes. Oups.
The semantic of dcbz was changed, it always clear 32 bytes. so the function
below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
which is defined to clear a cache line (as dcbz before). So we still can
distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
see <http://developer.apple.com/technotes/tn/tn2087.html>
and <http://developer.apple.com/technotes/tn/tn2086.html>
see <http://developer.apple.com/technotes/tn/tn2087.html>
and <http://developer.apple.com/technotes/tn/tn2086.html>
*/
void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
{
......@@ -216,8 +209,7 @@ long check_dcbzl_effect(void)
register long i = 0;
long count = 0;
if (!fakedata)
{
if (!fakedata) {
return 0L;
}
......@@ -229,8 +221,7 @@ long check_dcbzl_effect(void)
in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
for (i = 0; i < 1024 ; i ++)
{
for (i = 0; i < 1024 ; i ++) {
if (fakedata[i] == (char)0)
count++;
}
......@@ -286,17 +277,14 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
#ifdef CONFIG_ENCODERS
if (avctx->dct_algo == FF_DCT_AUTO ||
avctx->dct_algo == FF_DCT_ALTIVEC)
{
avctx->dct_algo == FF_DCT_ALTIVEC) {
c->fdct = fdct_altivec;
}
#endif //CONFIG_ENCODERS
if (avctx->lowres==0)
{
if (avctx->lowres==0) {
if ((avctx->idct_algo == FF_IDCT_AUTO) ||
(avctx->idct_algo == FF_IDCT_ALTIVEC))
{
(avctx->idct_algo == FF_IDCT_ALTIVEC)) {
c->idct_put = idct_put_altivec;
c->idct_add = idct_add_altivec;
c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
......@@ -306,10 +294,8 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
#ifdef CONFIG_POWERPC_PERF
{
int i, j;
for (i = 0 ; i < powerpc_perf_total ; i++)
{
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
{
for (i = 0 ; i < powerpc_perf_total ; i++) {
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
......
......@@ -125,14 +125,11 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
POWERPC_GET_PMC4(pmc_stop[3]); \
POWERPC_GET_PMC5(pmc_stop[4]); \
POWERPC_GET_PMC6(pmc_stop[5]); \
if (cond) \
{ \
if (cond) { \
for(pmc_loop_index = 0; \
pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \
pmc_loop_index++) \
{ \
if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \
{ \
pmc_loop_index++) { \
if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) { \
POWERP_PMC_DATATYPE diff = \
pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \
if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
......
......@@ -85,12 +85,9 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
c1 = vcii(p,p,n,n);
if (s->inverse)
{
if (s->inverse) {
c2 = vcii(p,p,n,p);
}
else
{
} else {
c2 = vcii(p,p,p,n);
}
......
......@@ -74,19 +74,17 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
src_1 = vec_ld(16, src);
srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src));
if (src_really_odd != 0x0000000F)
{ // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
if (src_really_odd != 0x0000000F) {
// if src & 0xF == 0xF, then (src+1) is properly aligned
// on the second vector.
srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
}
else
{
} else {
srcvB = src_1;
}
srcvA = vec_mergeh(vczero, srcvA);
srcvB = vec_mergeh(vczero, srcvB);
for(i=0; i<h; i++)
{
for(i=0; i<h; i++) {
dst_odd = (unsigned long)dst & 0x0000000F;
src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;
......@@ -100,12 +98,11 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
src_1 = vec_ld(stride + 16, src);
srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
if (src_really_odd != 0x0000000F)
{ // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
if (src_really_odd != 0x0000000F) {
// if src & 0xF == 0xF, then (src+1) is properly aligned
// on the second vector.
srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
}
else
{
} else {
srcvD = src_1;
}
......@@ -128,12 +125,9 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
dstv2 = vec_pack(tempD, (vector unsigned short)vczero);
if (dst_odd)
{
if (dst_odd) {
dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
}
else
{
} else {
dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
}
......
......@@ -392,8 +392,8 @@ static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
#define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
*/
H264_MC(put_, 16, altivec)
H264_MC(avg_, 16, altivec)
H264_MC(put_, 16, altivec)
H264_MC(avg_, 16, altivec)
/****************************************************************************
......
......@@ -344,7 +344,7 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i
src += srcStride;
dst += dstStride;
}
POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
}
/* this code assume stride % 16 == 0 */
......@@ -365,23 +365,23 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i
const vec_u8_t srcM2a = vec_ld(0, srcbis);
const vec_u8_t srcM2b = vec_ld(16, srcbis);
const vec_u8_t srcM2 = vec_perm(srcM2a, srcM2b, perm);
// srcbis += srcStride;
//srcbis += srcStride;
const vec_u8_t srcM1a = vec_ld(0, srcbis += srcStride);
const vec_u8_t srcM1b = vec_ld(16, srcbis);
const vec_u8_t srcM1 = vec_perm(srcM1a, srcM1b, perm);
// srcbis += srcStride;
//srcbis += srcStride;
const vec_u8_t srcP0a = vec_ld(0, srcbis += srcStride);
const vec_u8_t srcP0b = vec_ld(16, srcbis);
const vec_u8_t srcP0 = vec_perm(srcP0a, srcP0b, perm);
// srcbis += srcStride;
//srcbis += srcStride;
const vec_u8_t srcP1a = vec_ld(0, srcbis += srcStride);
const vec_u8_t srcP1b = vec_ld(16, srcbis);
const vec_u8_t srcP1 = vec_perm(srcP1a, srcP1b, perm);
// srcbis += srcStride;
//srcbis += srcStride;
const vec_u8_t srcP2a = vec_ld(0, srcbis += srcStride);
const vec_u8_t srcP2b = vec_ld(16, srcbis);
const vec_u8_t srcP2 = vec_perm(srcP2a, srcP2b, perm);
// srcbis += srcStride;
//srcbis += srcStride;
vec_s16_t srcM2ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
vec_s16_t srcM2ssB = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
......@@ -409,7 +409,7 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i
srcP3 = vec_perm(srcP3a, srcP3b, perm);
srcP3ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
srcP3ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
// srcbis += srcStride;
//srcbis += srcStride;
sum1A = vec_adds(srcP0ssA, srcP1ssA);
sum1B = vec_adds(srcP0ssB, srcP1ssB);
......
......@@ -22,7 +22,6 @@
* NOTE: This code is based on GPL code from the libmpeg2 project. The
* author, Michel Lespinasses, has given explicit permission to release
* under LGPL as part of ffmpeg.
*
*/
/*
......
......@@ -46,8 +46,7 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
vector signed short zeros, sumhv, sumlv;
s = src;
for(i=0;i<4;i++)
{
for(i=0;i<4;i++) {
/*
The vec_madds later on does an implicit >>15 on the result.
Since FILTER_BITS is 8, and we have 15 bits of magnitude in
......@@ -86,13 +85,11 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
/* Do our altivec resampling on 16 pixels at once. */
while(dst_width>=16) {
/*
Read 16 (potentially unaligned) bytes from each of
/* Read 16 (potentially unaligned) bytes from each of
4 lines into 4 vectors, and split them into shorts.
Interleave the multipy/accumulate for the resample
filter with the loads to hide the 3 cycle latency
the vec_madds have.
*/
the vec_madds have. */
tv = (vector unsigned char *) &s[0 * wrap];
tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
......@@ -121,10 +118,8 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
/*
Pack the results into our destination vector,
and do an aligned write of that back to memory.
*/
/* Pack the results into our destination vector,
and do an aligned write of that back to memory. */
dstv = vec_packsu(sumhv, sumlv) ;
vec_st(dstv, 0, (vector unsigned char *) dst);
......@@ -133,10 +128,8 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
dst_width-=16;
}
/*
If there are any leftover pixels, resample them
with the slow scalar method.
*/
/* If there are any leftover pixels, resample them
with the slow scalar method. */
while(dst_width>0) {
sum = s[0 * wrap] * filter[0] +
s[1 * wrap] * filter[1] +
......
......@@ -25,11 +25,11 @@
#if defined(ARCH_POWERPC_405)
/* signed 16x16 -> 32 multiply add accumulate */
# define MAC16(rt, ra, rb) \
#define MAC16(rt, ra, rb) \
asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
/* signed 16x16 -> 32 multiply */
# define MUL16(ra, rb) \
#define MUL16(ra, rb) \
({ int __rt; \
asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \
__rt; })
......
......@@ -137,10 +137,8 @@ int dct_quantize_altivec(MpegEncContext* s,
int whichPass, whichHalf;
for(whichPass = 1; whichPass<=2; whichPass++)
{
for(whichHalf = 1; whichHalf<=2; whichHalf++)
{
for(whichPass = 1; whichPass<=2; whichPass++) {
for(whichHalf = 1; whichHalf<=2; whichHalf++) {
vector float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
vector float tmp10, tmp11, tmp12, tmp13;
vector float z1, z2, z3, z4, z5;
......@@ -235,8 +233,7 @@ int dct_quantize_altivec(MpegEncContext* s,
SWAP(row7, alt7);
}
if (whichPass == 1)
{
if (whichPass == 1) {
// transpose the data for the second pass
// First, block transpose the upper right with lower left.
......@@ -261,8 +258,7 @@ int dct_quantize_altivec(MpegEncContext* s,
const vector signed int* qmat;
vector float bias, negBias;
if (s->mb_intra)
{
if (s->mb_intra) {
vector signed int baseVector;
// We must cache element 0 in the intra case
......@@ -272,9 +268,7 @@ int dct_quantize_altivec(MpegEncContext* s,
qmat = (vector signed int*)s->q_intra_matrix[qscale];
biasAddr = &(s->intra_quant_bias);
}
else
{
} else {
qmat = (vector signed int*)s->q_inter_matrix[qscale];
biasAddr = &(s->inter_quant_bias);
}
......@@ -439,8 +433,7 @@ int dct_quantize_altivec(MpegEncContext* s,
// and handle it using the vector unit if we can. This is the permute used
// by the altivec idct, so it is common when using the altivec dct.
if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM))
{
if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM)) {
TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7);
}
......@@ -456,10 +449,8 @@ int dct_quantize_altivec(MpegEncContext* s,
}
// special handling of block[0]
if (s->mb_intra)
{
if (!s->h263_aic)
{
if (s->mb_intra) {
if (!s->h263_aic) {
if (n < 4)
oldBaseValue /= s->y_dc_scale;
else
......@@ -474,8 +465,7 @@ int dct_quantize_altivec(MpegEncContext* s,
// need to permute the "no" permutation case.
if ((lastNonZero > 0) &&
(s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) &&
(s->dsp.idct_permutation_type != FF_NO_IDCT_PERM))
{
(s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)) {
ff_block_permute(data, s->dsp.idct_permutation,
s->intra_scantable.scantable, lastNonZero);
}
......@@ -483,10 +473,8 @@ int dct_quantize_altivec(MpegEncContext* s,
return lastNonZero;
}
/*
AltiVec version of dct_unquantize_h263
this code assumes `block' is 16 bytes-aligned
*/
/* AltiVec version of dct_unquantize_h263
this code assumes `block' is 16 bytes-aligned */
void dct_unquantize_h263_altivec(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
......@@ -559,8 +547,7 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
// vectorize all the 16 bytes-aligned blocks
// of 8 elements
for(; (j + 7) <= nCoeffs ; j+=8)
{
for(; (j + 7) <= nCoeffs ; j+=8) {
blockv = vec_ld(j << 1, block);
blockv_neg = vec_cmplt(blockv, vczero);
blockv_null = vec_cmpeq(blockv, vczero);
......@@ -589,8 +576,8 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
}
}
if (i == 1)
{ // cheat. this avoid special-casing the first iteration
if (i == 1) {
// cheat. this avoid special-casing the first iteration
block[0] = backup_0;
}
}
......@@ -605,11 +592,9 @@ void MPV_common_init_altivec(MpegEncContext *s)
{
if ((mm_flags & MM_ALTIVEC) == 0) return;
if (s->avctx->lowres==0)
{
if (s->avctx->lowres==0) {
if ((s->avctx->idct_algo == FF_IDCT_AUTO) ||
(s->avctx->idct_algo == FF_IDCT_ALTIVEC))
{
(s->avctx->idct_algo == FF_IDCT_ALTIVEC)) {
s->dsp.idct_put = idct_put_altivec;
s->dsp.idct_add = idct_add_altivec;
s->dsp.idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
......@@ -618,15 +603,13 @@ void MPV_common_init_altivec(MpegEncContext *s)
// Test to make sure that the dct required alignments are met.
if ((((long)(s->q_intra_matrix) & 0x0f) != 0) ||
(((long)(s->q_inter_matrix) & 0x0f) != 0))
{
(((long)(s->q_inter_matrix) & 0x0f) != 0)) {
av_log(s->avctx, AV_LOG_INFO, "Internal Error: q-matrix blocks must be 16-byte aligned "
"to use AltiVec DCT. Reverting to non-AltiVec version.\n");
return;
}
if (((long)(s->intra_scantable.inverse) & 0x0f) != 0)
{
if (((long)(s->intra_scantable.inverse) & 0x0f) != 0) {
av_log(s->avctx, AV_LOG_INFO, "Internal Error: scan table blocks must be 16-byte aligned "
"to use AltiVec DCT. Reverting to non-AltiVec version.\n");
return;
......@@ -634,8 +617,7 @@ void MPV_common_init_altivec(MpegEncContext *s)
if ((s->avctx->dct_algo == FF_DCT_AUTO) ||
(s->avctx->dct_algo == FF_DCT_ALTIVEC))
{
(s->avctx->dct_algo == FF_DCT_ALTIVEC)) {
#if 0 /* seems to cause trouble under some circumstances */
s->dct_quantize = dct_quantize_altivec;
#endif
......
......@@ -379,8 +379,7 @@ void ff_snow_vertical_compose97i_altivec(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
v4=(vector signed int *)b4;
v5=(vector signed int *)b5;
for (i=0; i< w4;i++)
{
for (i=0; i< w4;i++) {
#if 0
b4[i] -= (3*(b3[i] + b5[i])+4)>>3;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment