shift CABACContext.range right, this reduces the number of shifts needed in...

shift CABACContext.range right, this reduces the number of shifts needed in get_cabac() and is slightly faster on P3 (and should be much faster on P4 as the P4 except the more recent variants lacks an integer shifter and so shifts have ~10 times longer latency then simple operations like adds) git-svn-id: file:///var/local/repositories/ffmpeg/trunk@6702 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b

shift CABACContext.range right, this reduces the number of shifts needed in...
shift CABACContext.range right, this reduces the number of shifts needed in get_cabac() and is slightly faster on P3 (and should be much faster on P4 as the P4 except the more recent variants lacks an integer shifter and so shifts have ~10 times longer latency then simple operations like adds) git-svn-id: file:///var/local/repositories/ffmpeg/trunk@6702 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
822d7a14 · michael · ac504942 · 822d7a14 · 822d7a14
Commit 822d7a14 authored Oct 15, 2006 by michael
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 77 additions and 52 deletions

libavcodec/cabac.c libavcodec/cabac.c +29 -6

libavcodec/cabac.h libavcodec/cabac.h +48 -46

No files found.
--- a/libavcodec/cabac.c
+++ b/libavcodec/cabac.c
@@ -51,7 +51,7 @@ static const uint8_t lps_range[64][4]= {
 };

 uint8_t ff_h264_mlps_state[4*64];
-uint8_t ff_h264_lps_range[2*65][4];
+uint8_t ff_h264_lps_range[4][2*64];
 uint8_t ff_h264_lps_state[2*64];
 uint8_t ff_h264_mps_state[2*64];

@@ -76,8 +76,8 @@ static const uint8_t lps_state[64]= {
 33,33,34,34,35,35,35,36,
 36,36,37,37,37,38,38,63,
 };
-
-const uint8_t ff_h264_norm_shift[128]= {
+#if 0
+const uint8_t ff_h264_norm_shift_old[128]= {
 7,6,5,5,4,4,4,4,3,3,3,3,3,3,3,3,
 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -87,6 +87,29 @@ const uint8_t ff_h264_norm_shift[128]= {
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 };
+#endif
+const uint8_t ff_h264_norm_shift[512]= {
+ 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+};

 /**
 *
@@ -121,7 +144,7 @@ void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){
    c->low =  (*c->bytestream++)<<10;
 #endif
    c->low+= ((*c->bytestream++)<<2) + 2;
-    c->range= 0x1FE<<(CABAC_BITS + 1);
+    c->range= 0x1FE;
 }

 void ff_init_cabac_states(CABACContext *c){
@@ -129,8 +152,8 @@ void ff_init_cabac_states(CABACContext *c){

    for(i=0; i<64; i++){
        for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save
-            ff_h264_lps_range[2*i+0][j+4]=
-            ff_h264_lps_range[2*i+1][j+4]= lps_range[i][j];
+            ff_h264_lps_range[j][2*i+0]=
+            ff_h264_lps_range[j][2*i+1]= lps_range[i][j];
        }

        ff_h264_mlps_state[128+2*i+0]=

--- a/libavcodec/cabac.h
+++ b/libavcodec/cabac.h