Commit 73f73e4e authored by michael's avatar michael

use O(number of non zero coeffs) instead of O(number of coeffs) storage for...

use O(number of non zero coeffs) instead of O(number of coeffs) storage for the coefficient colleting/reordering


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@4279 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 5230cba7
...@@ -134,8 +134,15 @@ static inline void debug_dc_pred(const char *format, ...) { } ...@@ -134,8 +134,15 @@ static inline void debug_dc_pred(const char *format, ...) { }
static inline void debug_idct(const char *format, ...) { } static inline void debug_idct(const char *format, ...) { }
#endif #endif
typedef struct Coeff {
struct Coeff *next;
DCTELEM coeff;
uint8_t index;
} Coeff;
//FIXME split things out into their own arrays
typedef struct Vp3Fragment { typedef struct Vp3Fragment {
DCTELEM *coeffs; Coeff *next_coeff;
/* address of first pixel taking into account which plane the fragment /* address of first pixel taking into account which plane the fragment
* lives on as well as the plane stride */ * lives on as well as the plane stride */
int first_pixel; int first_pixel;
...@@ -143,7 +150,6 @@ typedef struct Vp3Fragment { ...@@ -143,7 +150,6 @@ typedef struct Vp3Fragment {
uint16_t macroblock; uint16_t macroblock;
uint8_t coding_method; uint8_t coding_method;
uint8_t coeff_count; uint8_t coeff_count;
int8_t last_coeff;
int8_t motion_x; int8_t motion_x;
int8_t motion_y; int8_t motion_y;
} Vp3Fragment; } Vp3Fragment;
...@@ -246,7 +252,8 @@ typedef struct Vp3DecodeContext { ...@@ -246,7 +252,8 @@ typedef struct Vp3DecodeContext {
int fragment_height; int fragment_height;
Vp3Fragment *all_fragments; Vp3Fragment *all_fragments;
DCTELEM *coeffs; Coeff *coeffs;
Coeff *next_coeff;
int u_fragment_start; int u_fragment_start;
int v_fragment_start; int v_fragment_start;
...@@ -833,16 +840,17 @@ static void unpack_token(GetBitContext *gb, int token, int *zero_run, ...@@ -833,16 +840,17 @@ static void unpack_token(GetBitContext *gb, int token, int *zero_run,
static void init_frame(Vp3DecodeContext *s, GetBitContext *gb) static void init_frame(Vp3DecodeContext *s, GetBitContext *gb)
{ {
int i; int i;
static const DCTELEM zero_block[64];
/* zero out all of the fragment information */ /* zero out all of the fragment information */
s->coded_fragment_list_index = 0; s->coded_fragment_list_index = 0;
for (i = 0; i < s->fragment_count; i++) { for (i = 0; i < s->fragment_count; i++) {
s->all_fragments[i].coeffs = zero_block;
s->all_fragments[i].coeff_count = 0; s->all_fragments[i].coeff_count = 0;
s->all_fragments[i].last_coeff = -1;
s->all_fragments[i].motion_x = 0xbeef; s->all_fragments[i].motion_x = 0xbeef;
s->all_fragments[i].motion_y = 0xbeef; s->all_fragments[i].motion_y = 0xbeef;
s->all_fragments[i].next_coeff= NULL;
s->coeffs[i].index=
s->coeffs[i].coeff=0;
s->coeffs[i].next= NULL;
} }
} }
...@@ -1260,6 +1268,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) ...@@ -1260,6 +1268,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
/* figure out which fragments are coded; iterate through each /* figure out which fragments are coded; iterate through each
* superblock (all planes) */ * superblock (all planes) */
s->coded_fragment_list_index = 0; s->coded_fragment_list_index = 0;
s->next_coeff= s->coeffs + s->fragment_count;
s->first_coded_y_fragment = s->first_coded_c_fragment = 0; s->first_coded_y_fragment = s->first_coded_c_fragment = 0;
s->last_coded_y_fragment = s->last_coded_c_fragment = -1; s->last_coded_y_fragment = s->last_coded_c_fragment = -1;
first_c_fragment_seen = 0; first_c_fragment_seen = 0;
...@@ -1302,7 +1311,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) ...@@ -1302,7 +1311,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
* the next phase */ * the next phase */
s->all_fragments[current_fragment].coding_method = s->all_fragments[current_fragment].coding_method =
MODE_INTER_NO_MV; MODE_INTER_NO_MV;
s->all_fragments[current_fragment].coeffs= s->coeffs + 64*s->coded_fragment_list_index; s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment;
s->coded_fragment_list[s->coded_fragment_list_index] = s->coded_fragment_list[s->coded_fragment_list_index] =
current_fragment; current_fragment;
if ((current_fragment >= s->u_fragment_start) && if ((current_fragment >= s->u_fragment_start) &&
...@@ -1330,7 +1339,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) ...@@ -1330,7 +1339,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
* coding will be determined in next step */ * coding will be determined in next step */
s->all_fragments[current_fragment].coding_method = s->all_fragments[current_fragment].coding_method =
MODE_INTER_NO_MV; MODE_INTER_NO_MV;
s->all_fragments[current_fragment].coeffs= s->coeffs + 64*s->coded_fragment_list_index; s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment;
s->coded_fragment_list[s->coded_fragment_list_index] = s->coded_fragment_list[s->coded_fragment_list_index] =
current_fragment; current_fragment;
if ((current_fragment >= s->u_fragment_start) && if ((current_fragment >= s->u_fragment_start) &&
...@@ -1716,15 +1725,19 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb, ...@@ -1716,15 +1725,19 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb,
if (!eob_run) { if (!eob_run) {
fragment->coeff_count += zero_run; fragment->coeff_count += zero_run;
if (fragment->coeff_count < 64) if (fragment->coeff_count < 64){
fragment->coeffs[perm[fragment->coeff_count++]] = coeff; fragment->next_coeff->coeff= coeff;
fragment->next_coeff->index= perm[fragment->coeff_count++]; //FIXME perm here already?
fragment->next_coeff->next= s->next_coeff;
s->next_coeff->next=NULL;
fragment->next_coeff= s->next_coeff++;
}
debug_vlc(" fragment %d coeff = %d\n", debug_vlc(" fragment %d coeff = %d\n",
s->coded_fragment_list[i], fragment->coeffs[coeff_index]); s->coded_fragment_list[i], fragment->next_coeff[coeff_index]);
} else { } else {
fragment->last_coeff = fragment->coeff_count; fragment->coeff_count |= 128;
fragment->coeff_count = 64;
debug_vlc(" fragment %d eob with %d coefficients\n", debug_vlc(" fragment %d eob with %d coefficients\n",
s->coded_fragment_list[i], fragment->last_coeff); s->coded_fragment_list[i], fragment->coeff_count&127);
eob_run--; eob_run--;
} }
} }
...@@ -1832,6 +1845,7 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb) ...@@ -1832,6 +1845,7 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
#define COMPATIBLE_FRAME(x) \ #define COMPATIBLE_FRAME(x) \
(compatible_frame[s->all_fragments[x].coding_method] == current_frame_type) (compatible_frame[s->all_fragments[x].coding_method] == current_frame_type)
#define FRAME_CODED(x) (s->all_fragments[x].coding_method != MODE_COPY) #define FRAME_CODED(x) (s->all_fragments[x].coding_method != MODE_COPY)
#define DC_COEFF(u) (s->coeffs[u].index ? 0 : s->coeffs[u].coeff) //FIXME do somethin to simplify this
static inline int iabs (int x) { return ((x < 0) ? -x : x); } static inline int iabs (int x) { return ((x < 0) ? -x : x); }
static void reverse_dc_prediction(Vp3DecodeContext *s, static void reverse_dc_prediction(Vp3DecodeContext *s,
...@@ -1942,7 +1956,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, ...@@ -1942,7 +1956,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
predictor_group = (x == 0) + ((y == 0) << 1) + predictor_group = (x == 0) + ((y == 0) << 1) +
((x + 1 == fragment_width) << 2); ((x + 1 == fragment_width) << 2);
debug_dc_pred(" frag %d: group %d, orig DC = %d, ", debug_dc_pred(" frag %d: group %d, orig DC = %d, ",
i, predictor_group, s->all_fragments[i].coeffs[0]); i, predictor_group, DC_COEFF(i));
switch (predictor_group) { switch (predictor_group) {
...@@ -1957,10 +1971,10 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, ...@@ -1957,10 +1971,10 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
l = i - 1; l = i - 1;
/* fetch the DC values for the predicting fragments */ /* fetch the DC values for the predicting fragments */
vul = s->all_fragments[ul].coeffs[0]; vul = DC_COEFF(ul);
vu = s->all_fragments[u].coeffs[0]; vu = DC_COEFF(u);
vur = s->all_fragments[ur].coeffs[0]; vur = DC_COEFF(ur);
vl = s->all_fragments[l].coeffs[0]; vl = DC_COEFF(l);
/* figure out which fragments are valid */ /* figure out which fragments are valid */
ful = FRAME_CODED(ul) && COMPATIBLE_FRAME(ul); ful = FRAME_CODED(ul) && COMPATIBLE_FRAME(ul);
...@@ -1982,8 +1996,8 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, ...@@ -1982,8 +1996,8 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
ur = i - fragment_width + 1; ur = i - fragment_width + 1;
/* fetch the DC values for the predicting fragments */ /* fetch the DC values for the predicting fragments */
vu = s->all_fragments[u].coeffs[0]; vu = DC_COEFF(u);
vur = s->all_fragments[ur].coeffs[0]; vur = DC_COEFF(ur);
/* figure out which fragments are valid */ /* figure out which fragments are valid */
fur = FRAME_CODED(ur) && COMPATIBLE_FRAME(ur); fur = FRAME_CODED(ur) && COMPATIBLE_FRAME(ur);
...@@ -2003,7 +2017,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, ...@@ -2003,7 +2017,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
l = i - 1; l = i - 1;
/* fetch the DC values for the predicting fragments */ /* fetch the DC values for the predicting fragments */
vl = s->all_fragments[l].coeffs[0]; vl = DC_COEFF(l);
/* figure out which fragments are valid */ /* figure out which fragments are valid */
fl = FRAME_CODED(l) && COMPATIBLE_FRAME(l); fl = FRAME_CODED(l) && COMPATIBLE_FRAME(l);
...@@ -2032,9 +2046,9 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, ...@@ -2032,9 +2046,9 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
l = i - 1; l = i - 1;
/* fetch the DC values for the predicting fragments */ /* fetch the DC values for the predicting fragments */
vul = s->all_fragments[ul].coeffs[0]; vul = DC_COEFF(ul);
vu = s->all_fragments[u].coeffs[0]; vu = DC_COEFF(u);
vl = s->all_fragments[l].coeffs[0]; vl = DC_COEFF(l);
/* figure out which fragments are valid */ /* figure out which fragments are valid */
ful = FRAME_CODED(ul) && COMPATIBLE_FRAME(ul); ful = FRAME_CODED(ul) && COMPATIBLE_FRAME(ul);
...@@ -2054,9 +2068,9 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, ...@@ -2054,9 +2068,9 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
/* if there were no fragments to predict from, use last /* if there were no fragments to predict from, use last
* DC saved */ * DC saved */
s->all_fragments[i].coeffs[0] += last_dc[current_frame_type]; predicted_dc = last_dc[current_frame_type];
debug_dc_pred("from last DC (%d) = %d\n", debug_dc_pred("from last DC (%d) = %d\n",
current_frame_type, s->all_fragments[i].coeffs[0]); current_frame_type, DC_COEFF(i));
} else { } else {
...@@ -2086,16 +2100,26 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, ...@@ -2086,16 +2100,26 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
predicted_dc = vul; predicted_dc = vul;
} }
/* at long last, apply the predictor */
s->all_fragments[i].coeffs[0] += predicted_dc;
debug_dc_pred("from pred DC = %d\n", debug_dc_pred("from pred DC = %d\n",
s->all_fragments[i].coeffs[0]); DC_COEFF(i));
} }
/* at long last, apply the predictor */
if(s->coeffs[i].index){
*s->next_coeff= s->coeffs[i];
s->coeffs[i].index=0;
s->coeffs[i].coeff=0;
s->coeffs[i].next= s->next_coeff++;
}
s->coeffs[i].coeff += predicted_dc;
/* save the DC */ /* save the DC */
last_dc[current_frame_type] = s->all_fragments[i].coeffs[0]; last_dc[current_frame_type] = DC_COEFF(i);
if(s->all_fragments[i].coeffs[0] && s->all_fragments[i].last_coeff<0) if(DC_COEFF(i) && !(s->all_fragments[i].coeff_count&127)){
s->all_fragments[i].last_coeff= 0; s->all_fragments[i].coeff_count= 129;
// s->all_fragments[i].next_coeff= s->next_coeff;
s->coeffs[i].next= s->next_coeff;
(s->next_coeff++)->next=NULL;
}
} }
} }
} }
...@@ -2115,7 +2139,7 @@ static void render_fragments(Vp3DecodeContext *s, ...@@ -2115,7 +2139,7 @@ static void render_fragments(Vp3DecodeContext *s,
int m, n; int m, n;
int i = first_fragment; int i = first_fragment;
int16_t *dequantizer; int16_t *dequantizer;
DCTELEM __align16 output_samples[64]; DCTELEM __align16 block[64];
unsigned char *output_plane; unsigned char *output_plane;
unsigned char *last_plane; unsigned char *last_plane;
unsigned char *golden_plane; unsigned char *golden_plane;
...@@ -2244,15 +2268,21 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x, ...@@ -2244,15 +2268,21 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x,
/* dequantize the DCT coefficients */ /* dequantize the DCT coefficients */
debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n", debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n",
i, s->all_fragments[i].coding_method, i, s->all_fragments[i].coding_method,
s->all_fragments[i].coeffs[0], dequantizer[0]); DC_COEFF(i), dequantizer[0]);
if(s->avctx->idct_algo==FF_IDCT_VP3){ if(s->avctx->idct_algo==FF_IDCT_VP3){
for (j = 0; j < 64; j++) { Coeff *coeff= s->coeffs + i;
s->all_fragments[i].coeffs[j] *= dequantizer[j]; memset(block, 0, sizeof(block));
while(coeff->next){
block[coeff->index]= coeff->coeff * dequantizer[coeff->index];
coeff= coeff->next;
} }
}else{ }else{
for (j = 0; j < 64; j++) { Coeff *coeff= s->coeffs + i;
s->all_fragments[i].coeffs[j]= (dequantizer[j] * s->all_fragments[i].coeffs[j] + 2) >> 2; memset(block, 0, sizeof(block));
while(coeff->next){
block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2;
coeff= coeff->next;
} }
} }
...@@ -2260,18 +2290,17 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x, ...@@ -2260,18 +2290,17 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x,
if (s->all_fragments[i].coding_method == MODE_INTRA) { if (s->all_fragments[i].coding_method == MODE_INTRA) {
if(s->avctx->idct_algo!=FF_IDCT_VP3) if(s->avctx->idct_algo!=FF_IDCT_VP3)
s->all_fragments[i].coeffs[0] += 128<<3; block[0] += 128<<3;
s->dsp.idct_put( s->dsp.idct_put(
output_plane + s->all_fragments[i].first_pixel, output_plane + s->all_fragments[i].first_pixel,
stride, stride,
s->all_fragments[i].coeffs); block);
} else { } else {
s->dsp.idct_add( s->dsp.idct_add(
output_plane + s->all_fragments[i].first_pixel, output_plane + s->all_fragments[i].first_pixel,
stride, stride,
s->all_fragments[i].coeffs); block);
} }
memset(s->all_fragments[i].coeffs, 0, 64*sizeof(DCTELEM));
debug_idct("block after idct_%s():\n", debug_idct("block after idct_%s():\n",
(s->all_fragments[i].coding_method == MODE_INTRA)? (s->all_fragments[i].coding_method == MODE_INTRA)?
...@@ -2611,7 +2640,7 @@ static int vp3_decode_init(AVCodecContext *avctx) ...@@ -2611,7 +2640,7 @@ static int vp3_decode_init(AVCodecContext *avctx)
s->v_fragment_start); s->v_fragment_start);
s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment)); s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment));
s->coeffs = av_malloc(s->fragment_count * sizeof(DCTELEM) * 64); s->coeffs = av_malloc(s->fragment_count * sizeof(Coeff) * 65);
s->coded_fragment_list = av_malloc(s->fragment_count * sizeof(int)); s->coded_fragment_list = av_malloc(s->fragment_count * sizeof(int));
s->pixel_addresses_inited = 0; s->pixel_addresses_inited = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment