Commit 0615d6b6 authored by Sam Hocevar's avatar Sam Hocevar

* removed an unused variable in the MMX YUVs.

  * fixed 32bpp MMX YUV, made the comments clearer, removed an emms.
parent 65d341df
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
* removed all "*vlc" aliases except "gvlc" and "fbvlc". The other sucked. * removed all "*vlc" aliases except "gvlc" and "fbvlc". The other sucked.
* new --synchro flag which lets you force which images are decoded. * new --synchro flag which lets you force which images are decoded.
* removed an unused variable in the MMX YUVs.
* fixed 32bpp MMX YUV, made the comments clearer, removed an emms.
Tue Aug 8 11:24:01 CEST 2000 Tue Aug 8 11:24:01 CEST 2000
0.1.99f : 0.1.99f :
......
...@@ -140,7 +140,7 @@ Difficulty: Medium ...@@ -140,7 +140,7 @@ Difficulty: Medium
Urgency: Important Urgency: Important
Description: Fix 32bpp MMX YUV Description: Fix 32bpp MMX YUV
The MMX 32bpp YUV function is buggy. The MMX 32bpp YUV function is buggy.
Status: Todo Status: Done 13 Aug 2000 (sam)
Task: 0x11 Task: 0x11
Difficulty: Hard Difficulty: Hard
......
...@@ -343,7 +343,8 @@ void SetYUV( vout_thread_t *p_vout ) ...@@ -343,7 +343,8 @@ void SetYUV( vout_thread_t *p_vout )
* It will also set horizontal and vertical scaling indicators. * It will also set horizontal and vertical scaling indicators.
*****************************************************************************/ *****************************************************************************/
void SetOffset( int i_width, int i_height, int i_pic_width, int i_pic_height, void SetOffset( int i_width, int i_height, int i_pic_width, int i_pic_height,
boolean_t *pb_h_scaling, int *pi_v_scaling, int *p_offset ) boolean_t *pb_h_scaling, int *pi_v_scaling, int *p_offset,
boolean_t b_double )
{ {
int i_x; /* x position in destination */ int i_x; /* x position in destination */
int i_scale_count; /* modulo counter */ int i_scale_count; /* modulo counter */
......
...@@ -34,21 +34,15 @@ ...@@ -34,21 +34,15 @@
#define PALETTE_TABLE_SIZE 2176 /* YUV -> 8bpp palette lookup table */ #define PALETTE_TABLE_SIZE 2176 /* YUV -> 8bpp palette lookup table */
/* argument lists for YUV functions */ /* argument lists for YUV functions */
#define YUV_ARGS_8BPP p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, \ #define YUV_ARGS( word_size ) p_vout_thread_t p_vout, word_size *p_pic, \
yuv_data_t *p_u, yuv_data_t *p_v, int i_width, int i_height, int i_pic_width, \ yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v, int i_width, int i_height, \
int i_pic_height, int i_pic_line_width, int i_matrix_coefficients int i_pic_width, int i_pic_height, int i_pic_line_width, \
int i_matrix_coefficients
#define YUV_ARGS_16BPP p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, \ #define YUV_ARGS_8BPP YUV_ARGS( u8 )
yuv_data_t *p_u, yuv_data_t *p_v, int i_width, int i_height, int i_pic_width, \ #define YUV_ARGS_16BPP YUV_ARGS( u16 )
int i_pic_height, int i_pic_line_width, int i_matrix_coefficients #define YUV_ARGS_24BPP YUV_ARGS( u32 )
#define YUV_ARGS_32BPP YUV_ARGS( u32 )
#define YUV_ARGS_24BPP p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, \
yuv_data_t *p_u, yuv_data_t *p_v, int i_width, int i_height, int i_pic_width, \
int i_pic_height, int i_pic_line_width, int i_matrix_coefficients
#define YUV_ARGS_32BPP p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, \
yuv_data_t *p_u, yuv_data_t *p_v, int i_width, int i_height, int i_pic_width, \
int i_pic_height, int i_pic_line_width, int i_matrix_coefficients
/***************************************************************************** /*****************************************************************************
* Local prototypes * Local prototypes
...@@ -56,7 +50,8 @@ int i_pic_height, int i_pic_line_width, int i_matrix_coefficients ...@@ -56,7 +50,8 @@ int i_pic_height, int i_pic_line_width, int i_matrix_coefficients
void SetYUV ( vout_thread_t *p_vout ); void SetYUV ( vout_thread_t *p_vout );
void SetOffset ( int i_width, int i_height, int i_pic_width, void SetOffset ( int i_width, int i_height, int i_pic_width,
int i_pic_height, boolean_t *pb_h_scaling, int i_pic_height, boolean_t *pb_h_scaling,
int *pi_v_scaling, int *p_offset ); int *pi_v_scaling, int *p_offset,
boolean_t b_double );
void ConvertY4Gray8 ( YUV_ARGS_8BPP ); void ConvertY4Gray8 ( YUV_ARGS_8BPP );
void ConvertYUV420RGB8 ( YUV_ARGS_8BPP ); void ConvertYUV420RGB8 ( YUV_ARGS_8BPP );
......
...@@ -65,7 +65,6 @@ void ConvertYUV420RGB16( YUV_ARGS_16BPP ) ...@@ -65,7 +65,6 @@ void ConvertYUV420RGB16( YUV_ARGS_16BPP )
int i_x, i_y; /* horizontal and vertical indexes */ int i_x, i_y; /* horizontal and vertical indexes */
int i_scale_count; /* scale modulo counter */ int i_scale_count; /* scale modulo counter */
int i_chroma_width; /* chroma width */ int i_chroma_width; /* chroma width */
u16 * p_yuv; /* base conversion table */
u16 * p_pic_start; /* beginning of the current line for copy */ u16 * p_pic_start; /* beginning of the current line for copy */
u16 * p_buffer_start; /* conversion buffer start */ u16 * p_buffer_start; /* conversion buffer start */
u16 * p_buffer; /* conversion buffer pointer */ u16 * p_buffer; /* conversion buffer pointer */
...@@ -77,11 +76,10 @@ void ConvertYUV420RGB16( YUV_ARGS_16BPP ) ...@@ -77,11 +76,10 @@ void ConvertYUV420RGB16( YUV_ARGS_16BPP )
*/ */
i_pic_line_width -= i_pic_width; i_pic_line_width -= i_pic_width;
i_chroma_width = i_width / 2; i_chroma_width = i_width / 2;
p_yuv = p_vout->yuv.yuv.p_rgb16;
p_buffer_start = p_vout->yuv.p_buffer; p_buffer_start = p_vout->yuv.p_buffer;
p_offset_start = p_vout->yuv.p_offset; p_offset_start = p_vout->yuv.p_offset;
SetOffset( i_width, i_height, i_pic_width, i_pic_height, SetOffset( i_width, i_height, i_pic_width, i_pic_height,
&b_horizontal_scaling, &i_vertical_scaling, p_offset_start ); &b_horizontal_scaling, &i_vertical_scaling, p_offset_start, 0 );
/* /*
* Perform conversion * Perform conversion
...@@ -114,7 +112,6 @@ void ConvertYUV420RGB16( YUV_ARGS_16BPP ) ...@@ -114,7 +112,6 @@ void ConvertYUV420RGB16( YUV_ARGS_16BPP )
SCALE_WIDTH; SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 ); SCALE_HEIGHT( 420, 2 );
} }
__asm__( "emms" );
} }
/***************************************************************************** /*****************************************************************************
......
...@@ -65,7 +65,6 @@ void ConvertYUV420RGB32( YUV_ARGS_32BPP ) ...@@ -65,7 +65,6 @@ void ConvertYUV420RGB32( YUV_ARGS_32BPP )
int i_x, i_y; /* horizontal and vertical indexes */ int i_x, i_y; /* horizontal and vertical indexes */
int i_scale_count; /* scale modulo counter */ int i_scale_count; /* scale modulo counter */
int i_chroma_width; /* chroma width */ int i_chroma_width; /* chroma width */
u32 * p_yuv; /* base conversion table */
u32 * p_pic_start; /* beginning of the current line for copy */ u32 * p_pic_start; /* beginning of the current line for copy */
u32 * p_buffer_start; /* conversion buffer start */ u32 * p_buffer_start; /* conversion buffer start */
u32 * p_buffer; /* conversion buffer pointer */ u32 * p_buffer; /* conversion buffer pointer */
...@@ -77,11 +76,10 @@ void ConvertYUV420RGB32( YUV_ARGS_32BPP ) ...@@ -77,11 +76,10 @@ void ConvertYUV420RGB32( YUV_ARGS_32BPP )
*/ */
i_pic_line_width -= i_pic_width; i_pic_line_width -= i_pic_width;
i_chroma_width = i_width / 2; i_chroma_width = i_width / 2;
p_yuv = p_vout->yuv.yuv.p_rgb32;
p_buffer_start = p_vout->yuv.p_buffer; p_buffer_start = p_vout->yuv.p_buffer;
p_offset_start = p_vout->yuv.p_offset; p_offset_start = p_vout->yuv.p_offset;
SetOffset( i_width, i_height, i_pic_width, i_pic_height, SetOffset( i_width, i_height, i_pic_width, i_pic_height,
&b_horizontal_scaling, &i_vertical_scaling, p_offset_start ); &b_horizontal_scaling, &i_vertical_scaling, p_offset_start, 0 );
/* /*
* Perform conversion * Perform conversion
...@@ -96,10 +94,14 @@ void ConvertYUV420RGB32( YUV_ARGS_32BPP ) ...@@ -96,10 +94,14 @@ void ConvertYUV420RGB32( YUV_ARGS_32BPP )
for ( i_x = i_width / 8; i_x--; ) for ( i_x = i_width / 8; i_x--; )
{ {
__asm__( ".align 8" MMX_INIT_32 __asm__( ".align 8"
MMX_INIT_32
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) ); : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
__asm__( ".align 8" MMX_YUV_ADD MMX_YUV_MUL MMX_UNPACK_32 __asm__( ".align 8"
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) ); : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
p_y += 8; p_y += 8;
...@@ -109,9 +111,8 @@ void ConvertYUV420RGB32( YUV_ARGS_32BPP ) ...@@ -109,9 +111,8 @@ void ConvertYUV420RGB32( YUV_ARGS_32BPP )
} }
SCALE_WIDTH; SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 ); SCALE_HEIGHT( 420, 4 );
} }
__asm__( "emms" );
} }
/***************************************************************************** /*****************************************************************************
......
...@@ -133,32 +133,39 @@ punpcklbw %%mm4, %%mm1 # R7 R6 R5 R4 R3 R2 R1 R0 \n\ ...@@ -133,32 +133,39 @@ punpcklbw %%mm4, %%mm1 # R7 R6 R5 R4 R3 R2 R1 R0 \n\
punpcklbw %%mm5, %%mm2 # G7 G6 G5 G4 G3 G2 G1 G0 \n\ punpcklbw %%mm5, %%mm2 # G7 G6 G5 G4 G3 G2 G1 G0 \n\
" "
/*
* convert RGB plane to RGB 16 bits,
* mm0 -> B, mm1 -> R, mm2 -> G,
* mm4 -> GB, mm5 -> AR pixel 4-7,
* mm6 -> GB, mm7 -> AR pixel 0-3
*/
#define MMX_UNPACK_16 " \n\ #define MMX_UNPACK_16 " \n\
\n\ \n\
# mask unneeded bits off \n\ # mask unneeded bits off \n\
pand mmx_redmask, %%mm0 # b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 \n\ pand mmx_redmask, %%mm0 # b7b6b5b4 b3______ b7b6b5b4 b3______ \n\
pand mmx_grnmask, %%mm2 # g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0 \n\ pand mmx_grnmask, %%mm2 # g7g6g5g4 g3g2____ g7g6g5g4 g3g2____ \n\
pand mmx_redmask, %%mm1 # r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 \n\ pand mmx_redmask, %%mm1 # r7r6r5r4 r3______ r7r6r5r4 r3______ \n\
psrlw mmx_blueshift,%%mm0 # 0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3 \n\ psrlw mmx_blueshift,%%mm0 # ______b7 b6b5b4b3 ______b7 b6b5b4b3 \n\
pxor %%mm4, %%mm4 # zero mm4 \n\ pxor %%mm4, %%mm4 # zero mm4 \n\
movq %%mm0, %%mm5 # Copy B7-B0 \n\ movq %%mm0, %%mm5 # Copy B7-B0 \n\
movq %%mm2, %%mm7 # Copy G7-G0 \n\ movq %%mm2, %%mm7 # Copy G7-G0 \n\
\n\ \n\
# convert rgb24 plane to rgb16 pack for pixel 0-3 \n\ # convert rgb24 plane to rgb16 pack for pixel 0-3 \n\
punpcklbw %%mm4, %%mm2 # 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 \n\ punpcklbw %%mm4, %%mm2 # ________ ________ g7g6g5g4 g3g2____ \n\
punpcklbw %%mm1, %%mm0 # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 \n\ punpcklbw %%mm1, %%mm0 # r7r6r5r4 r3______ ______b7 b6b5b4b3 \n\
psllw mmx_blueshift,%%mm2 # 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0 \n\ psllw mmx_blueshift,%%mm2 # ________ __g7g6g5 g4g3g2__ ________ \n\
por %%mm2, %%mm0 # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 \n\ por %%mm2, %%mm0 # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 \n\
movq 8(%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\ movq 8(%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
movq %%mm0, (%3) # store pixel 0-3 \n\ movq %%mm0, (%3) # store pixel 0-3 \n\
\n\ \n\
# convert rgb24 plane to rgb16 pack for pixel 0-3 \n\ # convert rgb24 plane to rgb16 pack for pixel 0-3 \n\
punpckhbw %%mm4, %%mm7 # 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 \n\ punpckhbw %%mm4, %%mm7 # ________ ________ g7g6g5g4 g3g2____ \n\
punpckhbw %%mm1, %%mm5 # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 \n\ punpckhbw %%mm1, %%mm5 # r7r6r5r4 r3______ ______b7 b6b5b4b3 \n\
psllw mmx_blueshift,%%mm7 # 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0 \n\ psllw mmx_blueshift,%%mm7 # ________ __g7g6g5 g4g3g2__ ________ \n\
movd 4(%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0 \n\ movd 4(%1), %%mm0 # Load 4 Cb __ __ __ __ u3 u2 u1 u0 \n\
por %%mm7, %%mm5 # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 \n\ por %%mm7, %%mm5 # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 \n\
movd 4(%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0 \n\ movd 4(%2), %%mm1 # Load 4 Cr __ __ __ __ v3 v2 v1 v0 \n\
movq %%mm5, 8(%3) # store pixel 4-7 \n\ movq %%mm5, 8(%3) # store pixel 4-7 \n\
" "
...@@ -192,9 +199,9 @@ movq %%mm0, %%mm4 # B7 B6 B5 B4 B3 B2 B1 B0 \n\ ...@@ -192,9 +199,9 @@ movq %%mm0, %%mm4 # B7 B6 B5 B4 B3 B2 B1 B0 \n\
punpckhbw %%mm2, %%mm4 # G7 B7 G6 B6 G5 B5 G4 B4 \n\ punpckhbw %%mm2, %%mm4 # G7 B7 G6 B6 G5 B5 G4 B4 \n\
punpckhwd %%mm5, %%mm4 # 00 R7 G7 B7 00 R6 B6 G6 \n\ punpckhwd %%mm5, %%mm4 # 00 R7 G7 B7 00 R6 B6 G6 \n\
movq %%mm4, 24(%3) # Store ARGB7 ARGB6 \n\ movq %%mm4, 24(%3) # Store ARGB7 ARGB6 \n\
movd 4(%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0 \n\ \n\
movd 4(%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0 \n\ #movd 4(%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0 \n\
pxor %%mm4, %%mm4 # zero mm4 \n\ #movd 4(%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0 \n\
movq 8(%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\ #pxor %%mm4, %%mm4 # zero mm4 \n\
#movq 8(%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
" "
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment