Commit 822d7a14 authored by michael's avatar michael

shift CABACContext.range right, this reduces the number of shifts needed in...

shift CABACContext.range right, this reduces the number of shifts needed in get_cabac() and is slightly faster on P3 (and should be much faster on P4 as the P4 except the more recent variants lacks an integer shifter and so  shifts have ~10 times longer latency then simple operations like adds)


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@6702 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent ac504942
...@@ -51,7 +51,7 @@ static const uint8_t lps_range[64][4]= { ...@@ -51,7 +51,7 @@ static const uint8_t lps_range[64][4]= {
}; };
uint8_t ff_h264_mlps_state[4*64]; uint8_t ff_h264_mlps_state[4*64];
uint8_t ff_h264_lps_range[2*65][4]; uint8_t ff_h264_lps_range[4][2*64];
uint8_t ff_h264_lps_state[2*64]; uint8_t ff_h264_lps_state[2*64];
uint8_t ff_h264_mps_state[2*64]; uint8_t ff_h264_mps_state[2*64];
...@@ -76,8 +76,8 @@ static const uint8_t lps_state[64]= { ...@@ -76,8 +76,8 @@ static const uint8_t lps_state[64]= {
33,33,34,34,35,35,35,36, 33,33,34,34,35,35,35,36,
36,36,37,37,37,38,38,63, 36,36,37,37,37,38,38,63,
}; };
#if 0
const uint8_t ff_h264_norm_shift[128]= { const uint8_t ff_h264_norm_shift_old[128]= {
7,6,5,5,4,4,4,4,3,3,3,3,3,3,3,3, 7,6,5,5,4,4,4,4,3,3,3,3,3,3,3,3,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
...@@ -87,6 +87,29 @@ const uint8_t ff_h264_norm_shift[128]= { ...@@ -87,6 +87,29 @@ const uint8_t ff_h264_norm_shift[128]= {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
}; };
#endif
const uint8_t ff_h264_norm_shift[512]= {
9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
/** /**
* *
...@@ -121,7 +144,7 @@ void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){ ...@@ -121,7 +144,7 @@ void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){
c->low = (*c->bytestream++)<<10; c->low = (*c->bytestream++)<<10;
#endif #endif
c->low+= ((*c->bytestream++)<<2) + 2; c->low+= ((*c->bytestream++)<<2) + 2;
c->range= 0x1FE<<(CABAC_BITS + 1); c->range= 0x1FE;
} }
void ff_init_cabac_states(CABACContext *c){ void ff_init_cabac_states(CABACContext *c){
...@@ -129,8 +152,8 @@ void ff_init_cabac_states(CABACContext *c){ ...@@ -129,8 +152,8 @@ void ff_init_cabac_states(CABACContext *c){
for(i=0; i<64; i++){ for(i=0; i<64; i++){
for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save
ff_h264_lps_range[2*i+0][j+4]= ff_h264_lps_range[j][2*i+0]=
ff_h264_lps_range[2*i+1][j+4]= lps_range[i][j]; ff_h264_lps_range[j][2*i+1]= lps_range[i][j];
} }
ff_h264_mlps_state[128+2*i+0]= ff_h264_mlps_state[128+2*i+0]=
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment