Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc
Commits
fe175c11
Commit
fe175c11
authored
Jun 14, 2000
by
Brieuc Jeunhomme
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
r�paration de mes betises sur la yuv (d�sol� tm), mais je remets ca bientot.
parent
c2e97975
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
34 additions
and
208 deletions
+34
-208
src/video_output/video_yuv.c
src/video_output/video_yuv.c
+34
-208
No files found.
src/video_output/video_yuv.c
View file @
fe175c11
...
@@ -12,15 +12,16 @@
...
@@ -12,15 +12,16 @@
* it under the terms of the GNU General Public License as published by
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* (at your option) any later version.
*
*
* This program is distributed in the hope that it will be useful,
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU
* G
NU G
eneral Public License for more details.
* General Public License for more details.
*
*
* You should have received a copy of the GNU General Public License
* You should have received a copy of the GNU General Public
* along with this program; if not, write to the Free Software
* License along with this program; if not, write to the
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*****************************************************************************/
*****************************************************************************/
/*****************************************************************************
/*****************************************************************************
...
@@ -72,26 +73,6 @@
...
@@ -72,26 +73,6 @@
#define V_RED_COEF ((int)(1.596 * (1<<SHIFT) / 1.164))
#define V_RED_COEF ((int)(1.596 * (1<<SHIFT) / 1.164))
#define V_GREEN_COEF ((int)(-0.813 * (1<<SHIFT) / 1.164))
#define V_GREEN_COEF ((int)(-0.813 * (1<<SHIFT) / 1.164))
#ifdef HAVE_MMX
/* hope these constant values are cache line aligned */
static
unsigned
long
long
mmx_80w
=
0x0080008000800080
;
static
unsigned
long
long
mmx_10w
=
0x1010101010101010
;
static
unsigned
long
long
mmx_00ffw
=
0x00ff00ff00ff00ff
;
static
unsigned
long
long
mmx_Y_coeff
=
0x253f253f253f253f
;
/* hope these constant values are cache line aligned */
static
unsigned
long
long
mmx_U_green
=
0xf37df37df37df37d
;
static
unsigned
long
long
mmx_U_blue
=
0x4093409340934093
;
static
unsigned
long
long
mmx_V_red
=
0x3312331233123312
;
static
unsigned
long
long
mmx_V_green
=
0xe5fce5fce5fce5fc
;
/* hope these constant values are cache line aligned */
static
unsigned
long
long
mmx_redmask
=
0xf8f8f8f8f8f8f8f8
;
static
unsigned
long
long
mmx_grnmask
=
0xfcfcfcfcfcfcfcfc
;
static
unsigned
long
long
mmx_grnshift
=
0x03
;
static
unsigned
long
long
mmx_blueshift
=
0x03
;
#endif
/*****************************************************************************
/*****************************************************************************
* Local prototypes
* Local prototypes
*****************************************************************************/
*****************************************************************************/
...
@@ -200,7 +181,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
...
@@ -200,7 +181,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
(((*p_y + dither10[i_real_y]) >> 4) << 7) \
(((*p_y + dither10[i_real_y]) >> 4) << 7) \
+ ((*p_u + dither20[i_real_y]) >> 5) * 9 \
+ ((*p_u + dither20[i_real_y]) >> 5) * 9 \
+ ((*p_v + dither20[i_real_y]) >> 5) ]; \
+ ((*p_v + dither20[i_real_y]) >> 5) ]; \
b_jump_uv
+= *p_offset;
\
b_jump_uv
= (b_jump_uv + *p_offset) & 0x1;
\
p_y += *p_offset; \
p_y += *p_offset; \
p_u += *p_offset & b_jump_uv; \
p_u += *p_offset & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
...
@@ -208,15 +189,15 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
...
@@ -208,15 +189,15 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
(((*p_y + dither11[i_real_y]) >> 4) << 7) \
(((*p_y + dither11[i_real_y]) >> 4) << 7) \
+ ((*p_u + dither21[i_real_y]) >> 5) * 9 \
+ ((*p_u + dither21[i_real_y]) >> 5) * 9 \
+ ((*p_v + dither21[i_real_y]) >> 5) ]; \
+ ((*p_v + dither21[i_real_y]) >> 5) ]; \
b_jump_uv
+= *p_offset;
\
b_jump_uv
= (b_jump_uv + *p_offset) & 0x1;
\
p_y += *p_offset; \
p_y += *p_offset; \
p_u += *p_offset & b_jump_uv; \
p_u += *p_offset & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
*p_pic++ = p_lookup[ \
*p_pic++ = p_lookup[ \
(((*p_y + dither12[i_real_y]) >> 4) << 7) \
(((*p_y + dither12[i_real_y]) >> 4) << 7) \
+ ((*p_u + dither22[i_real_y]) >> 5) * 9 \
+ ((*p_u + dither22[i_real_y]) >> 5) * 9 \
+ ((*p_v + dither22[i_real_y])
>> 5) ]; \
+ ((*p_v + dither22[i_real_y]) >> 5) ]; \
b_jump_uv
+= *p_offset;
\
b_jump_uv
= (b_jump_uv + *p_offset) & 0x1;
\
p_y += *p_offset; \
p_y += *p_offset; \
p_u += *p_offset & b_jump_uv; \
p_u += *p_offset & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
...
@@ -224,7 +205,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
...
@@ -224,7 +205,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
(((*p_y + dither13[i_real_y]) >> 4) << 7) \
(((*p_y + dither13[i_real_y]) >> 4) << 7) \
+ ((*p_u + dither23[i_real_y]) >> 5) * 9 \
+ ((*p_u + dither23[i_real_y]) >> 5) * 9 \
+ ((*p_v + dither23[i_real_y]) >> 5) ]; \
+ ((*p_v + dither23[i_real_y]) >> 5) ]; \
b_jump_uv
+= *p_offset;
\
b_jump_uv
= (b_jump_uv + *p_offset) & 0x1;
\
p_y += *p_offset; \
p_y += *p_offset; \
p_u += *p_offset & b_jump_uv; \
p_u += *p_offset & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
...
@@ -312,13 +293,12 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
...
@@ -312,13 +293,12 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
* and 4 Bpp.
* and 4 Bpp.
*****************************************************************************/
*****************************************************************************/
#define SCALE_HEIGHT( CHROMA, BPP ) \
#define SCALE_HEIGHT( CHROMA, BPP ) \
\
/* If line is odd, rewind 4:2:0 U and V samples */
\
/* If line is odd, rewind 4:2:0 U and V samples */
\
/*if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) ) */
\
if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) )
\
/*{ */
\
{
\
/* p_u -= i_chroma_width; */
\
p_u -= i_chroma_width;
\
/* p_v -= i_chroma_width; */
\
p_v -= i_chroma_width;
\
/*} */
\
}
\
\
\
/* \
/* \
* Handle vertical scaling. The current line can be copied or next one \
* Handle vertical scaling. The current line can be copied or next one \
...
@@ -327,70 +307,24 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
...
@@ -327,70 +307,24 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
switch( i_vertical_scaling ) \
switch( i_vertical_scaling ) \
{ \
{ \
case -1:
/* vertical scaling factor is < 1 */
\
case -1:
/* vertical scaling factor is < 1 */
\
if( i_y & 0x1 )
\
while( (i_scale_count -= i_pic_height) >= 0 )
\
{ \
{ \
while( (i_scale_count -= i_pic_height) >= 0 ) \
{ \
/* Height reduction: skip next source line */
\
/* Height reduction: skip next source line */
\
p_y += i_width; \
p_y += i_width; \
if( (CHROMA == 420) || (CHROMA == 422) ) \
i_y++; \
{ \
if( (CHROMA == 420) || (CHROMA == 422) ) \
if( (i_scale_count -= i_pic_height) >= 0 ) \
{ \
p_y += i_width; \
i_y += 2; \
p_u += i_chroma_width; \
p_v += i_chroma_width; \
continue; \
} \
else \
{ \
i_y++; \
break; \
} \
} \
else if( CHROMA == 444 ) \
{ \
i_y++; \
p_u += i_width; \
p_v += i_width; \
} \
} \
} \
else \
{ \
if( CHROMA == 420 || CHROMA == 422 ) \
{ \
p_u -= i_chroma_width; \
p_v -= i_chroma_width; \
} \
while( (i_scale_count -= i_pic_height) >= 0 ) \
{ \
{ \
/* Height reduction: skip next source line */
\
if( i_y & 0x1 ) \
p_y += i_width; \
if( (CHROMA == 420) || (CHROMA == 422) ) \
{ \
{ \
p_u += i_chroma_width; \
p_u += i_chroma_width; \
p_v += i_chroma_width; \
p_v += i_chroma_width; \
if( (i_scale_count -= i_pic_height) >= 0 ) \
{ \
p_y += i_width; \
i_y+=2; \
continue; \
} \
else \
{ \
i_y++; \
break; \
} \
} \
else if( CHROMA == 444 ) \
{ \
i_y++; \
p_u += i_width; \
p_v += i_width; \
} \
} \
} \
} \
else if( CHROMA == 444 ) \
{ \
p_u += i_width; \
p_v += i_width; \
} \
} \
} \
i_scale_count += i_height; \
i_scale_count += i_height; \
break; \
break; \
...
@@ -398,7 +332,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
...
@@ -398,7 +332,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
while( (i_scale_count -= i_height) > 0 ) \
while( (i_scale_count -= i_height) > 0 ) \
{ \
{ \
/* Height increment: copy previous picture line */
\
/* Height increment: copy previous picture line */
\
for( i_x = i_pic_width
>> 4
; i_x--; ) \
for( i_x = i_pic_width
/ 16
; i_x--; ) \
{ \
{ \
*(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ ); \
*(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ ); \
*(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ ); \
*(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ ); \
...
@@ -444,6 +378,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
...
@@ -444,6 +378,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
* Handle vertical scaling. The current line can be copied or next one \
* Handle vertical scaling. The current line can be copied or next one \
* can be ignored. \
* can be ignored. \
*/
\
*/
\
\
switch( i_vertical_scaling ) \
switch( i_vertical_scaling ) \
{ \
{ \
case -1:
/* vertical scaling factor is < 1 */
\
case -1:
/* vertical scaling factor is < 1 */
\
...
@@ -1168,16 +1103,15 @@ static void ConvertYUV420RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_
...
@@ -1168,16 +1103,15 @@ static void ConvertYUV420RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_
int
dither22
[
4
]
=
{
0x6
,
0x16
,
0x2
,
0x12
};
int
dither22
[
4
]
=
{
0x6
,
0x16
,
0x2
,
0x12
};
int
dither23
[
4
]
=
{
0x1e
,
0xe
,
0x1a
,
0xa
};
int
dither23
[
4
]
=
{
0x1e
,
0xe
,
0x1a
,
0xa
};
#if 0
/* other matrices that can be interesting, either for debugging or for effects */
/* other matrices that can be interesting, either for debugging or for
#if 0
* various effects */
int dither[4][4] = { { 0, 8, 2, 10 }, { 12, 4, 14, 16 }, { 3, 11, 1, 9}, {15, 7, 13, 5} };
int dither[4][4] = { { 0, 8, 2, 10 }, { 12, 4, 14, 16 }, { 3, 11, 1, 9}, {15, 7, 13, 5} };
int dither[4][4] = { { 7, 8, 0, 15 }, { 0, 15, 8, 7 }, { 7, 0, 15, 8 }, { 15, 7, 8, 0 } };
int dither[4][4] = { { 7, 8, 0, 15 }, { 0, 15, 8, 7 }, { 7, 0, 15, 8 }, { 15, 7, 8, 0 } };
int dither[4][4] = { { 0, 15, 0, 15 }, { 15, 0, 15, 0 }, { 0, 15, 0, 15 }, { 15, 0, 15, 0 } };
int dither[4][4] = { { 0, 15, 0, 15 }, { 15, 0, 15, 0 }, { 0, 15, 0, 15 }, { 15, 0, 15, 0 } };
int dither[4][4] = { { 15, 15, 0, 0 }, { 15, 15, 0, 0 }, { 0, 0, 15, 15 }, { 0, 0, 15, 15 } };
int dither[4][4] = { { 15, 15, 0, 0 }, { 15, 15, 0, 0 }, { 0, 0, 15, 15 }, { 0, 0, 15, 15 } };
int dither[4][4] = { { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 } };
int dither[4][4] = { { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 } };
int dither[4][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, { 8, 9, 10, 11 }, { 12, 13, 14, 15 } };
int dither[4][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, { 8, 9, 10, 11 }, { 12, 13, 14, 15 } };
#endif
#endif
/*
/*
* Initialize some values - i_pic_line_width will store the line skip
* Initialize some values - i_pic_line_width will store the line skip
...
@@ -1352,15 +1286,11 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
...
@@ -1352,15 +1286,11 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
int
i_vertical_scaling
;
/* vertical scaling type */
int
i_vertical_scaling
;
/* vertical scaling type */
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_scale_count
;
/* scale modulo counter */
int
i_scale_count
;
/* scale modulo counter */
#ifndef HAVE_MMX
int
i_uval
,
i_vval
;
/* U and V samples */
int
i_uval
,
i_vval
;
/* U and V samples */
int
i_red
,
i_green
,
i_blue
;
/* U and V modified samples */
int
i_red
,
i_green
,
i_blue
;
/* U and V modified samples */
#endif
int
i_chroma_width
;
/* chroma width */
int
i_chroma_width
;
/* chroma width */
u16
*
p_yuv
;
/* base conversion table */
u16
*
p_yuv
;
/* base conversion table */
#ifndef HAVE_MMX
u16
*
p_ybase
;
/* Y dependant conversion table */
u16
*
p_ybase
;
/* Y dependant conversion table */
#endif
u16
*
p_pic_start
;
/* beginning of the current line for copy */
u16
*
p_pic_start
;
/* beginning of the current line for copy */
u16
*
p_buffer_start
;
/* conversion buffer start */
u16
*
p_buffer_start
;
/* conversion buffer start */
u16
*
p_buffer
;
/* conversion buffer pointer */
u16
*
p_buffer
;
/* conversion buffer pointer */
...
@@ -1389,9 +1319,6 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
...
@@ -1389,9 +1319,6 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
p_pic_start
=
p_pic
;
p_pic_start
=
p_pic
;
p_buffer
=
b_horizontal_scaling
?
p_buffer_start
:
p_pic
;
p_buffer
=
b_horizontal_scaling
?
p_buffer_start
:
p_pic
;
#ifndef HAVE_MMX
/* Do YUV conversion to buffer - YUV picture is always formed of 16
/* Do YUV conversion to buffer - YUV picture is always formed of 16
* pixels wide blocks */
* pixels wide blocks */
for
(
i_x
=
i_width
/
16
;
i_x
--
;
)
for
(
i_x
=
i_width
/
16
;
i_x
--
;
)
...
@@ -1405,112 +1332,11 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
...
@@ -1405,112 +1332,11 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
CONVERT_YUV_PIXEL
(
2
);
CONVERT_Y_PIXEL
(
2
);
CONVERT_YUV_PIXEL
(
2
);
CONVERT_Y_PIXEL
(
2
);
CONVERT_YUV_PIXEL
(
2
);
CONVERT_Y_PIXEL
(
2
);
CONVERT_YUV_PIXEL
(
2
);
CONVERT_Y_PIXEL
(
2
);
}
}
/* Do horizontal and vertical scaling */
SCALE_WIDTH
;
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
SCALE_HEIGHT
(
420
,
2
);
}
}
#else
for
(
i_x
=
i_width
/
8
;
i_x
--
;
)
{
__asm__
(
"movd (%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0
\n\t
"
"movd (%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0
\n\t
"
"pxor %%mm4, %%mm4 # zero mm4
\n\t
"
"movq (%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
\n\t
"
//"movl $0, (%3) # cache preload for image\n\t"
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
));
__asm__
(
".align 8
\n\t
"
/* Do the multiply part of the conversion for even and odd pixels,
* register usage:
* mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
* mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels,
* mm6 -> Y even, mm7 -> Y odd */
/* convert the chroma part */
"punpcklbw %%mm4, %%mm0 # scatter 4 Cb 00 u3 00 u2 00 u1 00 u0
\n\t
"
"punpcklbw %%mm4, %%mm1 # scatter 4 Cr 00 v3 00 v2 00 v1 00 v0
\n\t
"
"psubsw mmx_80w, %%mm0 # Cb -= 128
\n\t
"
"psubsw mmx_80w, %%mm1 # Cr -= 128
\n\t
"
"psllw $3, %%mm0 # Promote precision
\n\t
"
"psllw $3, %%mm1 # Promote precision
\n\t
"
"movq %%mm0, %%mm2 # Copy 4 Cb 00 u3 00 u2 00 u1 00 u0
\n\t
"
"movq %%mm1, %%mm3 # Copy 4 Cr 00 v3 00 v2 00 v1 00 v0
\n\t
"
"pmulhw mmx_U_green, %%mm2# Mul Cb with green coeff -> Cb green
\n\t
"
"pmulhw mmx_V_green, %%mm3# Mul Cr with green coeff -> Cr green
\n\t
"
"pmulhw mmx_U_blue, %%mm0 # Mul Cb -> Cblue 00 b3 00 b2 00 b1 00 b0
\n\t
"
"pmulhw mmx_V_red, %%mm1 # Mul Cr -> Cred 00 r3 00 r2 00 r1 00 r0
\n\t
"
"paddsw %%mm3, %%mm2 # Cb green + Cr green -> Cgreen
\n\t
"
/* convert the luma part */
"psubusb mmx_10w, %%mm6 # Y -= 16
\n\t
"
"movq %%mm6, %%mm7 # Copy 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
\n\t
"
"pand mmx_00ffw, %%mm6 # get Y even 00 Y6 00 Y4 00 Y2 00 Y0
\n\t
"
"psrlw $8, %%mm7 # get Y odd 00 Y7 00 Y5 00 Y3 00 Y1
\n\t
"
"psllw $3, %%mm6 # Promote precision
\n\t
"
"psllw $3, %%mm7 # Promote precision
\n\t
"
"pmulhw mmx_Y_coeff, %%mm6# Mul 4 Y even 00 y6 00 y4 00 y2 00 y0
\n\t
"
"pmulhw mmx_Y_coeff, %%mm7# Mul 4 Y odd 00 y7 00 y5 00 y3 00 y1
\n\t
"
/* Do the addition part of the conversion for even and odd pixels,
* register usage:
* mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
* mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels,
* mm6 -> Y even, mm7 -> Y odd */
/* Do horizontal and vertical scaling */
"movq %%mm0, %%mm3 # Copy Cblue
\n\t
"
"movq %%mm1, %%mm4 # Copy Cred
\n\t
"
"movq %%mm2, %%mm5 # Copy Cgreen
\n\t
"
"paddsw %%mm6, %%mm0 # Y even + Cblue 00 B6 00 B4 00 B2 00 B0
\n\t
"
"paddsw %%mm7, %%mm3 # Y odd + Cblue 00 B7 00 B5 00 B3 00 B1
\n\t
"
"paddsw %%mm6, %%mm1 # Y even + Cred 00 R6 00 R4 00 R2 00 R0
\n\t
"
"paddsw %%mm7, %%mm4 # Y odd + Cred 00 R7 00 R5 00 R3 00 R1
\n\t
"
"paddsw %%mm6, %%mm2 # Y even + Cgreen 00 G6 00 G4 00 G2 00 G0
\n\t
"
"paddsw %%mm7, %%mm5 # Y odd + Cgreen 00 G7 00 G5 00 G3 00 G1
\n\t
"
/* Limit RGB even to 0..255 */
"packuswb %%mm0, %%mm0 # B6 B4 B2 B0 | B6 B4 B2 B0
\n\t
"
"packuswb %%mm1, %%mm1 # R6 R4 R2 R0 | R6 R4 R2 R0
\n\t
"
"packuswb %%mm2, %%mm2 # G6 G4 G2 G0 | G6 G4 G2 G0
\n\t
"
/* Limit RGB odd to 0..255 */
"packuswb %%mm3, %%mm3 # B7 B5 B3 B1 | B7 B5 B3 B1
\n\t
"
"packuswb %%mm4, %%mm4 # R7 R5 R3 R1 | R7 R5 R3 R1
\n\t
"
"packuswb %%mm5, %%mm5 # G7 G5 G3 G1 | G7 G5 G3 G1
\n\t
"
/* Interleave RGB even and odd */
"punpcklbw %%mm3, %%mm0 # B7 B6 B5 B4 B3 B2 B1 B0
\n\t
"
"punpcklbw %%mm4, %%mm1 # R7 R6 R5 R4 R3 R2 R1 R0
\n\t
"
"punpcklbw %%mm5, %%mm2 # G7 G6 G5 G4 G3 G2 G1 G0
\n\t
"
/* mask unneeded bits off */
"pand mmx_redmask, %%mm0# b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0
\n\t
"
"pand mmx_grnmask, %%mm2# g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0
\n\t
"
"pand mmx_redmask, %%mm1# r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0
\n\t
"
"psrlw mmx_blueshift,%%mm0#0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3
\n\t
"
"pxor %%mm4, %%mm4 # zero mm4
\n\t
"
"movq %%mm0, %%mm5 # Copy B7-B0
\n\t
"
"movq %%mm2, %%mm7 # Copy G7-G0
\n\t
"
/* convert rgb24 plane to rgb16 pack for pixel 0-3 */
"punpcklbw %%mm4, %%mm2 # 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0
\n\t
"
"punpcklbw %%mm1, %%mm0 # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3
\n\t
"
"psllw mmx_blueshift,%%mm2# 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0
\n\t
"
"por %%mm2, %%mm0 # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3
\n\t
"
"movq 8(%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
\n\t
"
"movq %%mm0, (%3) # store pixel 0-3
\n\t
"
/* convert rgb24 plane to rgb16 pack for pixel 0-3 */
"punpckhbw %%mm4, %%mm7 # 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0
\n\t
"
"punpckhbw %%mm1, %%mm5 # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3
\n\t
"
"psllw mmx_blueshift,%%mm7# 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0
\n\t
"
"movd 4(%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0
\n\t
"
"por %%mm7, %%mm5 # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3
\n\t
"
"movd 4(%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0
\n\t
"
"movq %%mm5, 8(%3) # store pixel 4-7
\n\t
"
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
));
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
}
__asm__
(
"emms
\n\t
"
);
#endif
}
}
/*****************************************************************************
/*****************************************************************************
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment