Commit c8b11826 authored by michael's avatar michael

make state transition tables global as they are constant and the code is slightly faster that way


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@6655 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent ee4dc127
...@@ -51,8 +51,10 @@ static const uint8_t lps_range[64][4]= { ...@@ -51,8 +51,10 @@ static const uint8_t lps_range[64][4]= {
}; };
uint8_t ff_h264_lps_range[2*65][4]; uint8_t ff_h264_lps_range[2*65][4];
uint8_t ff_h264_lps_state[2*64];
uint8_t ff_h264_mps_state[2*64];
const uint8_t ff_h264_mps_state[64]= { static const uint8_t mps_state[64]= {
1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8,
9,10,11,12,13,14,15,16, 9,10,11,12,13,14,15,16,
17,18,19,20,21,22,23,24, 17,18,19,20,21,22,23,24,
...@@ -63,7 +65,7 @@ const uint8_t ff_h264_mps_state[64]= { ...@@ -63,7 +65,7 @@ const uint8_t ff_h264_mps_state[64]= {
57,58,59,60,61,62,62,63, 57,58,59,60,61,62,62,63,
}; };
const uint8_t ff_h264_lps_state[64]= { static const uint8_t lps_state[64]= {
0, 0, 1, 2, 2, 4, 4, 5, 0, 0, 1, 2, 2, 4, 4, 5,
6, 7, 8, 9, 9,11,11,12, 6, 7, 8, 9, 9,11,11,12,
13,13,15,15,16,16,18,18, 13,13,15,15,16,16,18,18,
...@@ -121,32 +123,31 @@ void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){ ...@@ -121,32 +123,31 @@ void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){
c->range= 0x1FE<<(CABAC_BITS + 1); c->range= 0x1FE<<(CABAC_BITS + 1);
} }
void ff_init_cabac_states(CABACContext *c, void ff_init_cabac_states(CABACContext *c){
uint8_t const *mps_state, uint8_t const *lps_state, int state_count){
int i, j; int i, j;
for(i=0; i<state_count; i++){ for(i=0; i<64; i++){
for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save
ff_h264_lps_range[2*i+0][j+4]= ff_h264_lps_range[2*i+0][j+4]=
ff_h264_lps_range[2*i+1][j+4]= lps_range[i][j]; ff_h264_lps_range[2*i+1][j+4]= lps_range[i][j];
} }
c->mps_state[2*i+0]= 2*mps_state[i]+0; ff_h264_mps_state[2*i+0]= 2*mps_state[i]+0;
c->mps_state[2*i+1]= 2*mps_state[i]+1; ff_h264_mps_state[2*i+1]= 2*mps_state[i]+1;
if( i ){ if( i ){
#ifdef BRANCHLESS_CABAC_DECODER #ifdef BRANCHLESS_CABAC_DECODER
c->mps_state[-2*i-1]= 2*lps_state[i]+0; //FIXME yes this is not valid C but iam lazy, cleanup welcome ff_h264_mps_state[-2*i-1]= 2*lps_state[i]+0; //FIXME yes this is not valid C but iam lazy, cleanup welcome
c->mps_state[-2*i-2]= 2*lps_state[i]+1; ff_h264_mps_state[-2*i-2]= 2*lps_state[i]+1;
}else{ }else{
c->mps_state[-2*i-1]= 1; ff_h264_mps_state[-2*i-1]= 1;
c->mps_state[-2*i-2]= 0; ff_h264_mps_state[-2*i-2]= 0;
#else #else
c->lps_state[2*i+0]= 2*lps_state[i]+0; ff_h264_lps_state[2*i+0]= 2*lps_state[i]+0;
c->lps_state[2*i+1]= 2*lps_state[i]+1; ff_h264_lps_state[2*i+1]= 2*lps_state[i]+1;
}else{ }else{
c->lps_state[2*i+0]= 1; ff_h264_lps_state[2*i+0]= 1;
c->lps_state[2*i+1]= 0; ff_h264_lps_state[2*i+1]= 0;
#endif #endif
} }
} }
......
...@@ -41,8 +41,6 @@ typedef struct CABACContext{ ...@@ -41,8 +41,6 @@ typedef struct CABACContext{
#ifdef STRICT_LIMITS #ifdef STRICT_LIMITS
int symCount; int symCount;
#endif #endif
uint8_t lps_state[2*64]; ///< transIdxLPS
uint8_t mps_state[2*64]; ///< transIdxMPS
const uint8_t *bytestream_start; const uint8_t *bytestream_start;
const uint8_t *bytestream; const uint8_t *bytestream;
const uint8_t *bytestream_end; const uint8_t *bytestream_end;
...@@ -50,15 +48,14 @@ typedef struct CABACContext{ ...@@ -50,15 +48,14 @@ typedef struct CABACContext{
}CABACContext; }CABACContext;
extern uint8_t ff_h264_lps_range[2*65][4]; ///< rangeTabLPS extern uint8_t ff_h264_lps_range[2*65][4]; ///< rangeTabLPS
extern const uint8_t ff_h264_mps_state[64]; extern uint8_t ff_h264_mps_state[2*64]; ///< transIdxMPS
extern const uint8_t ff_h264_lps_state[64]; extern uint8_t ff_h264_lps_state[2*64]; ///< transIdxLPS
extern const uint8_t ff_h264_norm_shift[128]; extern const uint8_t ff_h264_norm_shift[128];
void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size); void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size); void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
void ff_init_cabac_states(CABACContext *c, void ff_init_cabac_states(CABACContext *c);
uint8_t const *mps_state, uint8_t const *lps_state, int state_count);
static inline void put_cabac_bit(CABACContext *c, int b){ static inline void put_cabac_bit(CABACContext *c, int b){
...@@ -91,11 +88,11 @@ static void put_cabac(CABACContext *c, uint8_t * const state, int bit){ ...@@ -91,11 +88,11 @@ static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
if(bit == ((*state)&1)){ if(bit == ((*state)&1)){
c->range -= RangeLPS; c->range -= RangeLPS;
*state= c->mps_state[*state]; *state= ff_h264_mps_state[*state];
}else{ }else{
c->low += c->range - RangeLPS; c->low += c->range - RangeLPS;
c->range = RangeLPS; c->range = RangeLPS;
*state= c->lps_state[*state]; *state= ff_h264_lps_state[*state];
} }
renorm_cabac_encoder(c); renorm_cabac_encoder(c);
...@@ -369,11 +366,9 @@ static int get_cabac(CABACContext *c, uint8_t * const state){ ...@@ -369,11 +366,9 @@ static int get_cabac(CABACContext *c, uint8_t * const state){
#define LOW "0" #define LOW "0"
#define RANGE "4" #define RANGE "4"
#define LPS_STATE "12" #define BYTESTART "12"
#define MPS_STATE "12+2*64" #define BYTE "16"
#define BYTESTART "12+4*64" #define BYTEEND "20"
#define BYTE "16+4*64"
#define BYTEEND "20+4*64"
#ifndef BRANCHLESS_CABAC_DECODER #ifndef BRANCHLESS_CABAC_DECODER
asm volatile( asm volatile(
"movzbl (%1), %%eax \n\t" "movzbl (%1), %%eax \n\t"
...@@ -391,7 +386,7 @@ static int get_cabac(CABACContext *c, uint8_t * const state){ ...@@ -391,7 +386,7 @@ static int get_cabac(CABACContext *c, uint8_t * const state){
"setb %%cl \n\t" "setb %%cl \n\t"
"shl %%cl, %%edx \n\t" "shl %%cl, %%edx \n\t"
"shl %%cl, %%ebx \n\t" "shl %%cl, %%ebx \n\t"
"movzbl "MPS_STATE"(%2, %%eax), %%ecx \n\t" "movzbl "MANGLE(ff_h264_mps_state)"(%%eax), %%ecx \n\t"
"movb %%cl, (%1) \n\t" "movb %%cl, (%1) \n\t"
//eax:state ebx:low, edx:range, esi:RangeLPS //eax:state ebx:low, edx:range, esi:RangeLPS
"test %%bx, %%bx \n\t" "test %%bx, %%bx \n\t"
...@@ -413,7 +408,7 @@ static int get_cabac(CABACContext *c, uint8_t * const state){ ...@@ -413,7 +408,7 @@ static int get_cabac(CABACContext *c, uint8_t * const state){
"movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t" "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
"shll %%cl, %%ebx \n\t" "shll %%cl, %%ebx \n\t"
"shll %%cl, %%edx \n\t" "shll %%cl, %%edx \n\t"
"movzbl "LPS_STATE"(%2, %%eax), %%ecx \n\t" "movzbl "MANGLE(ff_h264_lps_state)"(%%eax), %%ecx \n\t"
"movb %%cl, (%1) \n\t" "movb %%cl, (%1) \n\t"
"addl $1, %%eax \n\t" "addl $1, %%eax \n\t"
"test %%bx, %%bx \n\t" "test %%bx, %%bx \n\t"
...@@ -475,7 +470,7 @@ static int get_cabac(CABACContext *c, uint8_t * const state){ ...@@ -475,7 +470,7 @@ static int get_cabac(CABACContext *c, uint8_t * const state){
#endif #endif
//eax:state ebx:low edx:mask esi:range //eax:state ebx:low edx:mask esi:range
"movzbl "MPS_STATE"(%2, %%eax), %%ecx \n\t" "movzbl "MANGLE(ff_h264_mps_state)"(%%eax), %%ecx \n\t"
"movb %%cl, (%1) \n\t" "movb %%cl, (%1) \n\t"
"movl %%esi, %%edx \n\t" "movl %%esi, %%edx \n\t"
...@@ -523,12 +518,12 @@ static int get_cabac(CABACContext *c, uint8_t * const state){ ...@@ -523,12 +518,12 @@ static int get_cabac(CABACContext *c, uint8_t * const state){
#ifndef BRANCHLESS_CABAC_DECODER #ifndef BRANCHLESS_CABAC_DECODER
if(c->low < c->range){ if(c->low < c->range){
bit= s&1; bit= s&1;
*state= c->mps_state[s]; *state= ff_h264_mps_state[s];
renorm_cabac_decoder_once(c); renorm_cabac_decoder_once(c);
}else{ }else{
bit= ff_h264_norm_shift[RangeLPS>>19]; bit= ff_h264_norm_shift[RangeLPS>>19];
c->low -= c->range; c->low -= c->range;
*state= c->lps_state[s]; *state= ff_h264_lps_state[s];
c->range = RangeLPS<<bit; c->range = RangeLPS<<bit;
c->low <<= bit; c->low <<= bit;
bit= (s&1)^1; bit= (s&1)^1;
...@@ -544,7 +539,7 @@ static int get_cabac(CABACContext *c, uint8_t * const state){ ...@@ -544,7 +539,7 @@ static int get_cabac(CABACContext *c, uint8_t * const state){
c->range += (RangeLPS - c->range) & lps_mask; c->range += (RangeLPS - c->range) & lps_mask;
s^=lps_mask; s^=lps_mask;
*state= c->mps_state[s]; *state= ff_h264_mps_state[s];
bit= s&1; bit= s&1;
lps_mask= ff_h264_norm_shift[c->range>>(CABAC_BITS+3)]; lps_mask= ff_h264_norm_shift[c->range>>(CABAC_BITS+3)];
......
...@@ -7388,7 +7388,7 @@ static int decode_slice(H264Context *h){ ...@@ -7388,7 +7388,7 @@ static int decode_slice(H264Context *h){
align_get_bits( &s->gb ); align_get_bits( &s->gb );
/* init cabac */ /* init cabac */
ff_init_cabac_states( &h->cabac, ff_h264_mps_state, ff_h264_lps_state, 64 ); ff_init_cabac_states( &h->cabac);
ff_init_cabac_decoder( &h->cabac, ff_init_cabac_decoder( &h->cabac,
s->gb.buffer + get_bits_count(&s->gb)/8, s->gb.buffer + get_bits_count(&s->gb)/8,
( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8); ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment