Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc-gpu
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc-gpu
Commits
fe175c11
Commit
fe175c11
authored
Jun 14, 2000
by
Brieuc Jeunhomme
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
r�paration de mes betises sur la yuv (d�sol� tm), mais je remets ca bientot.
parent
c2e97975
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
34 additions
and
208 deletions
+34
-208
src/video_output/video_yuv.c
src/video_output/video_yuv.c
+34
-208
No files found.
src/video_output/video_yuv.c
View file @
fe175c11
...
@@ -15,12 +15,13 @@
...
@@ -15,12 +15,13 @@
*
*
* This program is distributed in the hope that it will be useful,
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU
* G
NU G
eneral Public License for more details.
* General Public License for more details.
*
*
* You should have received a copy of the GNU General Public License
* You should have received a copy of the GNU General Public
* along with this program; if not, write to the Free Software
* License along with this program; if not, write to the
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*****************************************************************************/
*****************************************************************************/
/*****************************************************************************
/*****************************************************************************
...
@@ -72,26 +73,6 @@
...
@@ -72,26 +73,6 @@
#define V_RED_COEF ((int)(1.596 * (1<<SHIFT) / 1.164))
#define V_RED_COEF ((int)(1.596 * (1<<SHIFT) / 1.164))
#define V_GREEN_COEF ((int)(-0.813 * (1<<SHIFT) / 1.164))
#define V_GREEN_COEF ((int)(-0.813 * (1<<SHIFT) / 1.164))
#ifdef HAVE_MMX
/* hope these constant values are cache line aligned */
static
unsigned
long
long
mmx_80w
=
0x0080008000800080
;
static
unsigned
long
long
mmx_10w
=
0x1010101010101010
;
static
unsigned
long
long
mmx_00ffw
=
0x00ff00ff00ff00ff
;
static
unsigned
long
long
mmx_Y_coeff
=
0x253f253f253f253f
;
/* hope these constant values are cache line aligned */
static
unsigned
long
long
mmx_U_green
=
0xf37df37df37df37d
;
static
unsigned
long
long
mmx_U_blue
=
0x4093409340934093
;
static
unsigned
long
long
mmx_V_red
=
0x3312331233123312
;
static
unsigned
long
long
mmx_V_green
=
0xe5fce5fce5fce5fc
;
/* hope these constant values are cache line aligned */
static
unsigned
long
long
mmx_redmask
=
0xf8f8f8f8f8f8f8f8
;
static
unsigned
long
long
mmx_grnmask
=
0xfcfcfcfcfcfcfcfc
;
static
unsigned
long
long
mmx_grnshift
=
0x03
;
static
unsigned
long
long
mmx_blueshift
=
0x03
;
#endif
/*****************************************************************************
/*****************************************************************************
* Local prototypes
* Local prototypes
*****************************************************************************/
*****************************************************************************/
...
@@ -200,7 +181,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
...
@@ -200,7 +181,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
(((*p_y + dither10[i_real_y]) >> 4) << 7) \
(((*p_y + dither10[i_real_y]) >> 4) << 7) \
+ ((*p_u + dither20[i_real_y]) >> 5) * 9 \
+ ((*p_u + dither20[i_real_y]) >> 5) * 9 \
+ ((*p_v + dither20[i_real_y]) >> 5) ]; \
+ ((*p_v + dither20[i_real_y]) >> 5) ]; \
b_jump_uv
+= *p_offset;
\
b_jump_uv
= (b_jump_uv + *p_offset) & 0x1;
\
p_y += *p_offset; \
p_y += *p_offset; \
p_u += *p_offset & b_jump_uv; \
p_u += *p_offset & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
...
@@ -208,15 +189,15 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
...
@@ -208,15 +189,15 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
(((*p_y + dither11[i_real_y]) >> 4) << 7) \
(((*p_y + dither11[i_real_y]) >> 4) << 7) \
+ ((*p_u + dither21[i_real_y]) >> 5) * 9 \
+ ((*p_u + dither21[i_real_y]) >> 5) * 9 \
+ ((*p_v + dither21[i_real_y]) >> 5) ]; \
+ ((*p_v + dither21[i_real_y]) >> 5) ]; \
b_jump_uv
+= *p_offset;
\
b_jump_uv
= (b_jump_uv + *p_offset) & 0x1;
\
p_y += *p_offset; \
p_y += *p_offset; \
p_u += *p_offset & b_jump_uv; \
p_u += *p_offset & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
*p_pic++ = p_lookup[ \
*p_pic++ = p_lookup[ \
(((*p_y + dither12[i_real_y]) >> 4) << 7) \
(((*p_y + dither12[i_real_y]) >> 4) << 7) \
+ ((*p_u + dither22[i_real_y]) >> 5) * 9 \
+ ((*p_u + dither22[i_real_y]) >> 5) * 9 \
+ ((*p_v + dither22[i_real_y])
>> 5) ]; \
+ ((*p_v + dither22[i_real_y]) >> 5) ]; \
b_jump_uv
+= *p_offset;
\
b_jump_uv
= (b_jump_uv + *p_offset) & 0x1;
\
p_y += *p_offset; \
p_y += *p_offset; \
p_u += *p_offset & b_jump_uv; \
p_u += *p_offset & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
...
@@ -224,7 +205,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
...
@@ -224,7 +205,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
(((*p_y + dither13[i_real_y]) >> 4) << 7) \
(((*p_y + dither13[i_real_y]) >> 4) << 7) \
+ ((*p_u + dither23[i_real_y]) >> 5) * 9 \
+ ((*p_u + dither23[i_real_y]) >> 5) * 9 \
+ ((*p_v + dither23[i_real_y]) >> 5) ]; \
+ ((*p_v + dither23[i_real_y]) >> 5) ]; \
b_jump_uv
+= *p_offset;
\
b_jump_uv
= (b_jump_uv + *p_offset) & 0x1;
\
p_y += *p_offset; \
p_y += *p_offset; \
p_u += *p_offset & b_jump_uv; \
p_u += *p_offset & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
p_v += *p_offset++ & b_jump_uv; \
...
@@ -312,13 +293,12 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
...
@@ -312,13 +293,12 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
* and 4 Bpp.
* and 4 Bpp.
*****************************************************************************/
*****************************************************************************/
#define SCALE_HEIGHT( CHROMA, BPP ) \
#define SCALE_HEIGHT( CHROMA, BPP ) \
\
/* If line is odd, rewind 4:2:0 U and V samples */
\
/* If line is odd, rewind 4:2:0 U and V samples */
\
/*if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) ) */
\
if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) )
\
/*{ */
\
{
\
/* p_u -= i_chroma_width; */
\
p_u -= i_chroma_width;
\
/* p_v -= i_chroma_width; */
\
p_v -= i_chroma_width;
\
/*} */
\
}
\
\
\
/* \
/* \
* Handle vertical scaling. The current line can be copied or next one \
* Handle vertical scaling. The current line can be copied or next one \
...
@@ -327,78 +307,32 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
...
@@ -327,78 +307,32 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
switch( i_vertical_scaling ) \
switch( i_vertical_scaling ) \
{ \
{ \
case -1:
/* vertical scaling factor is < 1 */
\
case -1:
/* vertical scaling factor is < 1 */
\
if( i_y & 0x1 ) \
{ \
while( (i_scale_count -= i_pic_height) >= 0 ) \
while( (i_scale_count -= i_pic_height) >= 0 ) \
{ \
{ \
/* Height reduction: skip next source line */
\
/* Height reduction: skip next source line */
\
p_y += i_width; \
p_y += i_width; \
if( (CHROMA == 420) || (CHROMA == 422) ) \
{ \
if( (i_scale_count -= i_pic_height) >= 0 ) \
{ \
p_y += i_width; \
i_y += 2; \
p_u += i_chroma_width; \
p_v += i_chroma_width; \
continue; \
} \
else \
{ \
i_y++; \
break; \
} \
} \
else if( CHROMA == 444 ) \
{ \
i_y++; \
i_y++; \
p_u += i_width; \
p_v += i_width; \
} \
} \
} \
else \
{ \
if( CHROMA == 420 || CHROMA == 422 ) \
{ \
p_u -= i_chroma_width; \
p_v -= i_chroma_width; \
} \
while( (i_scale_count -= i_pic_height) >= 0 ) \
{ \
/* Height reduction: skip next source line */
\
p_y += i_width; \
if( (CHROMA == 420) || (CHROMA == 422) ) \
if( (CHROMA == 420) || (CHROMA == 422) ) \
{ \
if( i_y & 0x1 ) \
{ \
{ \
p_u += i_chroma_width; \
p_u += i_chroma_width; \
p_v += i_chroma_width; \
p_v += i_chroma_width; \
if( (i_scale_count -= i_pic_height) >= 0 ) \
{ \
p_y += i_width; \
i_y+=2; \
continue; \
} \
else \
{ \
i_y++; \
break; \
} \
} \
} \
} \
else if( CHROMA == 444 ) \
else if( CHROMA == 444 ) \
{ \
{ \
i_y++; \
p_u += i_width; \
p_u += i_width; \
p_v += i_width; \
p_v += i_width; \
} \
} \
} \
} \
} \
i_scale_count += i_height; \
i_scale_count += i_height; \
break; \
break; \
case 1:
/* vertical scaling factor is > 1 */
\
case 1:
/* vertical scaling factor is > 1 */
\
while( (i_scale_count -= i_height) > 0 ) \
while( (i_scale_count -= i_height) > 0 ) \
{ \
{ \
/* Height increment: copy previous picture line */
\
/* Height increment: copy previous picture line */
\
for( i_x = i_pic_width
>> 4
; i_x--; ) \
for( i_x = i_pic_width
/ 16
; i_x--; ) \
{ \
{ \
*(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ ); \
*(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ ); \
*(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ ); \
*(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ ); \
...
@@ -444,6 +378,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
...
@@ -444,6 +378,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
* Handle vertical scaling. The current line can be copied or next one \
* Handle vertical scaling. The current line can be copied or next one \
* can be ignored. \
* can be ignored. \
*/
\
*/
\
\
switch( i_vertical_scaling ) \
switch( i_vertical_scaling ) \
{ \
{ \
case -1:
/* vertical scaling factor is < 1 */
\
case -1:
/* vertical scaling factor is < 1 */
\
...
@@ -1168,16 +1103,15 @@ static void ConvertYUV420RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_
...
@@ -1168,16 +1103,15 @@ static void ConvertYUV420RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_
int
dither22
[
4
]
=
{
0x6
,
0x16
,
0x2
,
0x12
};
int
dither22
[
4
]
=
{
0x6
,
0x16
,
0x2
,
0x12
};
int
dither23
[
4
]
=
{
0x1e
,
0xe
,
0x1a
,
0xa
};
int
dither23
[
4
]
=
{
0x1e
,
0xe
,
0x1a
,
0xa
};
#if 0
/* other matrices that can be interesting, either for debugging or for effects */
/* other matrices that can be interesting, either for debugging or for
#if 0
* various effects */
int dither[4][4] = { { 0, 8, 2, 10 }, { 12, 4, 14, 16 }, { 3, 11, 1, 9}, {15, 7, 13, 5} };
int dither[4][4] = { { 0, 8, 2, 10 }, { 12, 4, 14, 16 }, { 3, 11, 1, 9}, {15, 7, 13, 5} };
int dither[4][4] = { { 7, 8, 0, 15 }, { 0, 15, 8, 7 }, { 7, 0, 15, 8 }, { 15, 7, 8, 0 } };
int dither[4][4] = { { 7, 8, 0, 15 }, { 0, 15, 8, 7 }, { 7, 0, 15, 8 }, { 15, 7, 8, 0 } };
int dither[4][4] = { { 0, 15, 0, 15 }, { 15, 0, 15, 0 }, { 0, 15, 0, 15 }, { 15, 0, 15, 0 } };
int dither[4][4] = { { 0, 15, 0, 15 }, { 15, 0, 15, 0 }, { 0, 15, 0, 15 }, { 15, 0, 15, 0 } };
int dither[4][4] = { { 15, 15, 0, 0 }, { 15, 15, 0, 0 }, { 0, 0, 15, 15 }, { 0, 0, 15, 15 } };
int dither[4][4] = { { 15, 15, 0, 0 }, { 15, 15, 0, 0 }, { 0, 0, 15, 15 }, { 0, 0, 15, 15 } };
int dither[4][4] = { { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 } };
int dither[4][4] = { { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 } };
int dither[4][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, { 8, 9, 10, 11 }, { 12, 13, 14, 15 } };
int dither[4][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, { 8, 9, 10, 11 }, { 12, 13, 14, 15 } };
#endif
#endif
/*
/*
* Initialize some values - i_pic_line_width will store the line skip
* Initialize some values - i_pic_line_width will store the line skip
...
@@ -1352,15 +1286,11 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
...
@@ -1352,15 +1286,11 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
int
i_vertical_scaling
;
/* vertical scaling type */
int
i_vertical_scaling
;
/* vertical scaling type */
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_scale_count
;
/* scale modulo counter */
int
i_scale_count
;
/* scale modulo counter */
#ifndef HAVE_MMX
int
i_uval
,
i_vval
;
/* U and V samples */
int
i_uval
,
i_vval
;
/* U and V samples */
int
i_red
,
i_green
,
i_blue
;
/* U and V modified samples */
int
i_red
,
i_green
,
i_blue
;
/* U and V modified samples */
#endif
int
i_chroma_width
;
/* chroma width */
int
i_chroma_width
;
/* chroma width */
u16
*
p_yuv
;
/* base conversion table */
u16
*
p_yuv
;
/* base conversion table */
#ifndef HAVE_MMX
u16
*
p_ybase
;
/* Y dependant conversion table */
u16
*
p_ybase
;
/* Y dependant conversion table */
#endif
u16
*
p_pic_start
;
/* beginning of the current line for copy */
u16
*
p_pic_start
;
/* beginning of the current line for copy */
u16
*
p_buffer_start
;
/* conversion buffer start */
u16
*
p_buffer_start
;
/* conversion buffer start */
u16
*
p_buffer
;
/* conversion buffer pointer */
u16
*
p_buffer
;
/* conversion buffer pointer */
...
@@ -1389,9 +1319,6 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
...
@@ -1389,9 +1319,6 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
p_pic_start
=
p_pic
;
p_pic_start
=
p_pic
;
p_buffer
=
b_horizontal_scaling
?
p_buffer_start
:
p_pic
;
p_buffer
=
b_horizontal_scaling
?
p_buffer_start
:
p_pic
;
#ifndef HAVE_MMX
/* Do YUV conversion to buffer - YUV picture is always formed of 16
/* Do YUV conversion to buffer - YUV picture is always formed of 16
* pixels wide blocks */
* pixels wide blocks */
for
(
i_x
=
i_width
/
16
;
i_x
--
;
)
for
(
i_x
=
i_width
/
16
;
i_x
--
;
)
...
@@ -1405,112 +1332,11 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
...
@@ -1405,112 +1332,11 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
CONVERT_YUV_PIXEL
(
2
);
CONVERT_Y_PIXEL
(
2
);
CONVERT_YUV_PIXEL
(
2
);
CONVERT_Y_PIXEL
(
2
);
CONVERT_YUV_PIXEL
(
2
);
CONVERT_Y_PIXEL
(
2
);
CONVERT_YUV_PIXEL
(
2
);
CONVERT_Y_PIXEL
(
2
);
}
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
}
#else
for
(
i_x
=
i_width
/
8
;
i_x
--
;
)
{
__asm__
(
"movd (%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0
\n\t
"
"movd (%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0
\n\t
"
"pxor %%mm4, %%mm4 # zero mm4
\n\t
"
"movq (%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
\n\t
"
//"movl $0, (%3) # cache preload for image\n\t"
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
));
__asm__
(
".align 8
\n\t
"
/* Do the multiply part of the conversion for even and odd pixels,
* register usage:
* mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
* mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels,
* mm6 -> Y even, mm7 -> Y odd */
/* convert the chroma part */
"punpcklbw %%mm4, %%mm0 # scatter 4 Cb 00 u3 00 u2 00 u1 00 u0
\n\t
"
"punpcklbw %%mm4, %%mm1 # scatter 4 Cr 00 v3 00 v2 00 v1 00 v0
\n\t
"
"psubsw mmx_80w, %%mm0 # Cb -= 128
\n\t
"
"psubsw mmx_80w, %%mm1 # Cr -= 128
\n\t
"
"psllw $3, %%mm0 # Promote precision
\n\t
"
"psllw $3, %%mm1 # Promote precision
\n\t
"
"movq %%mm0, %%mm2 # Copy 4 Cb 00 u3 00 u2 00 u1 00 u0
\n\t
"
"movq %%mm1, %%mm3 # Copy 4 Cr 00 v3 00 v2 00 v1 00 v0
\n\t
"
"pmulhw mmx_U_green, %%mm2# Mul Cb with green coeff -> Cb green
\n\t
"
"pmulhw mmx_V_green, %%mm3# Mul Cr with green coeff -> Cr green
\n\t
"
"pmulhw mmx_U_blue, %%mm0 # Mul Cb -> Cblue 00 b3 00 b2 00 b1 00 b0
\n\t
"
"pmulhw mmx_V_red, %%mm1 # Mul Cr -> Cred 00 r3 00 r2 00 r1 00 r0
\n\t
"
"paddsw %%mm3, %%mm2 # Cb green + Cr green -> Cgreen
\n\t
"
/* convert the luma part */
"psubusb mmx_10w, %%mm6 # Y -= 16
\n\t
"
"movq %%mm6, %%mm7 # Copy 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
\n\t
"
"pand mmx_00ffw, %%mm6 # get Y even 00 Y6 00 Y4 00 Y2 00 Y0
\n\t
"
"psrlw $8, %%mm7 # get Y odd 00 Y7 00 Y5 00 Y3 00 Y1
\n\t
"
"psllw $3, %%mm6 # Promote precision
\n\t
"
"psllw $3, %%mm7 # Promote precision
\n\t
"
"pmulhw mmx_Y_coeff, %%mm6# Mul 4 Y even 00 y6 00 y4 00 y2 00 y0
\n\t
"
"pmulhw mmx_Y_coeff, %%mm7# Mul 4 Y odd 00 y7 00 y5 00 y3 00 y1
\n\t
"
/* Do the addition part of the conversion for even and odd pixels,
* register usage:
* mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
* mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels,
* mm6 -> Y even, mm7 -> Y odd */
/* Do horizontal and vertical scaling */
"movq %%mm0, %%mm3 # Copy Cblue
\n\t
"
"movq %%mm1, %%mm4 # Copy Cred
\n\t
"
"movq %%mm2, %%mm5 # Copy Cgreen
\n\t
"
"paddsw %%mm6, %%mm0 # Y even + Cblue 00 B6 00 B4 00 B2 00 B0
\n\t
"
"paddsw %%mm7, %%mm3 # Y odd + Cblue 00 B7 00 B5 00 B3 00 B1
\n\t
"
"paddsw %%mm6, %%mm1 # Y even + Cred 00 R6 00 R4 00 R2 00 R0
\n\t
"
"paddsw %%mm7, %%mm4 # Y odd + Cred 00 R7 00 R5 00 R3 00 R1
\n\t
"
"paddsw %%mm6, %%mm2 # Y even + Cgreen 00 G6 00 G4 00 G2 00 G0
\n\t
"
"paddsw %%mm7, %%mm5 # Y odd + Cgreen 00 G7 00 G5 00 G3 00 G1
\n\t
"
/* Limit RGB even to 0..255 */
"packuswb %%mm0, %%mm0 # B6 B4 B2 B0 | B6 B4 B2 B0
\n\t
"
"packuswb %%mm1, %%mm1 # R6 R4 R2 R0 | R6 R4 R2 R0
\n\t
"
"packuswb %%mm2, %%mm2 # G6 G4 G2 G0 | G6 G4 G2 G0
\n\t
"
/* Limit RGB odd to 0..255 */
"packuswb %%mm3, %%mm3 # B7 B5 B3 B1 | B7 B5 B3 B1
\n\t
"
"packuswb %%mm4, %%mm4 # R7 R5 R3 R1 | R7 R5 R3 R1
\n\t
"
"packuswb %%mm5, %%mm5 # G7 G5 G3 G1 | G7 G5 G3 G1
\n\t
"
/* Interleave RGB even and odd */
"punpcklbw %%mm3, %%mm0 # B7 B6 B5 B4 B3 B2 B1 B0
\n\t
"
"punpcklbw %%mm4, %%mm1 # R7 R6 R5 R4 R3 R2 R1 R0
\n\t
"
"punpcklbw %%mm5, %%mm2 # G7 G6 G5 G4 G3 G2 G1 G0
\n\t
"
/* mask unneeded bits off */
"pand mmx_redmask, %%mm0# b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0
\n\t
"
"pand mmx_grnmask, %%mm2# g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0
\n\t
"
"pand mmx_redmask, %%mm1# r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0
\n\t
"
"psrlw mmx_blueshift,%%mm0#0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3
\n\t
"
"pxor %%mm4, %%mm4 # zero mm4
\n\t
"
"movq %%mm0, %%mm5 # Copy B7-B0
\n\t
"
"movq %%mm2, %%mm7 # Copy G7-G0
\n\t
"
/* convert rgb24 plane to rgb16 pack for pixel 0-3 */
"punpcklbw %%mm4, %%mm2 # 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0
\n\t
"
"punpcklbw %%mm1, %%mm0 # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3
\n\t
"
"psllw mmx_blueshift,%%mm2# 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0
\n\t
"
"por %%mm2, %%mm0 # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3
\n\t
"
"movq 8(%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
\n\t
"
"movq %%mm0, (%3) # store pixel 0-3
\n\t
"
/* convert rgb24 plane to rgb16 pack for pixel 0-3 */
"punpckhbw %%mm4, %%mm7 # 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0
\n\t
"
"punpckhbw %%mm1, %%mm5 # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3
\n\t
"
"psllw mmx_blueshift,%%mm7# 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0
\n\t
"
"movd 4(%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0
\n\t
"
"por %%mm7, %%mm5 # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3
\n\t
"
"movd 4(%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0
\n\t
"
"movq %%mm5, 8(%3) # store pixel 4-7
\n\t
"
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
));
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
/* Do horizontal and vertical scaling */
SCALE_WIDTH
;
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
SCALE_HEIGHT
(
420
,
2
);
}
}
__asm__
(
"emms
\n\t
"
);
#endif
}
}
/*****************************************************************************
/*****************************************************************************
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment