Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc
Commits
5e4dc54c
Commit
5e4dc54c
authored
Aug 02, 2007
by
Damien Fouilleul
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
chromas: more SSE2/MMX fixes, added I420_RGBA conversion
parent
c23c9ae9
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
575 additions
and
171 deletions
+575
-171
modules/video_chroma/i420_rgb.c
modules/video_chroma/i420_rgb.c
+1
-2
modules/video_chroma/i420_rgb.h
modules/video_chroma/i420_rgb.h
+1
-0
modules/video_chroma/i420_rgb16.c
modules/video_chroma/i420_rgb16.c
+239
-0
modules/video_chroma/i420_rgb_mmx.h
modules/video_chroma/i420_rgb_mmx.h
+316
-151
modules/video_chroma/i420_yuy2.h
modules/video_chroma/i420_yuy2.h
+18
-18
No files found.
modules/video_chroma/i420_rgb.c
View file @
5e4dc54c
...
...
@@ -161,8 +161,7 @@ static int Activate( vlc_object_t *p_this )
{
/* R8G8B8A8 pixel format */
msg_Dbg
(
p_this
,
"RGB pixel format is R8G8B8A8"
);
//p_vout->chroma.pf_convert = E_(I420_B8G8R8A8);
return
-
1
;
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_R8G8B8A8
);
}
else
if
(
p_vout
->
output
.
i_rmask
==
0x0000ff00
&&
p_vout
->
output
.
i_gmask
==
0x00ff0000
...
...
modules/video_chroma/i420_rgb.h
View file @
5e4dc54c
...
...
@@ -64,6 +64,7 @@ void E_(I420_RGB32) ( vout_thread_t *, picture_t *, picture_t * );
void
E_
(
I420_R5G5B5
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_R5G6B5
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_A8R8G8B8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_R8G8B8A8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_B8G8R8A8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_A8B8G8R8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
#endif
...
...
modules/video_chroma/i420_rgb16.c
View file @
5e4dc54c
...
...
@@ -1140,6 +1140,245 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
#endif
}
void
E_
(
I420_R8G8B8A8
)(
vout_thread_t
*
p_vout
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint32_t
*
p_pic
=
(
uint32_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
vlc_bool_t
b_hscale
;
/* horizontal scaling type */
unsigned
int
i_vscale
;
/* vertical scaling type */
unsigned
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_right_margin
;
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_vout
->
render
.
i_width
/
2
;
/* chroma width */
uint32_t
*
p_pic_start
;
/* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t
*
p_buffer_start
=
(
uint32_t
*
)
p_vout
->
chroma
.
p_sys
->
p_buffer
;
uint32_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_vout
->
chroma
.
p_sys
->
p_offset
;
int
*
p_offset
;
const
int
i_source_margin
=
p_src
->
p
[
0
].
i_pitch
-
p_src
->
p
[
0
].
i_visible_pitch
;
const
int
i_source_margin_c
=
p_src
->
p
[
1
].
i_pitch
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset
(
p_vout
->
render
.
i_width
,
p_vout
->
render
.
i_height
,
p_vout
->
output
.
i_width
,
p_vout
->
output
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_vout
->
output
.
i_height
:
p_vout
->
render
.
i_height
;
#if defined (MODULE_NAME_IS_i420_rgb_sse2)
if
(
p_vout
->
render
.
i_width
&
15
)
{
i_rewind
=
16
-
(
p_vout
->
render
.
i_width
&
15
);
}
else
{
i_rewind
=
0
;
}
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
if
(
0
==
(
15
&
(
p_src
->
p
[
Y_PLANE
].
i_pitch
|
p_dest
->
p
->
i_pitch
|
((
int
)
p_y
)
|
((
int
)
p_buffer
)))
)
{
/* use faster SSE2 aligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_vout
->
render
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
for
(
i_x
=
p_vout
->
render
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
4
;
p_v
+=
4
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_vout
->
render
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_vout
->
render
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END
;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
if
(
p_vout
->
render
.
i_width
&
7
)
{
i_rewind
=
8
-
(
p_vout
->
render
.
i_width
&
7
);
}
else
{
i_rewind
=
0
;
}
for
(
i_y
=
0
;
i_y
<
p_vout
->
render
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_vout
->
render
.
i_width
/
8
;
i_x
--
;
)
{
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_RGBA
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_RGBA
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
}
/* re-enable FPU registers */
MMX_END
;
#endif
}
void
E_
(
I420_B8G8R8A8
)(
vout_thread_t
*
p_vout
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
...
...
modules/video_chroma/i420_rgb_mmx.h
View file @
5e4dc54c
This diff is collapsed.
Click to expand it.
modules/video_chroma/i420_yuy2.h
View file @
5e4dc54c
...
...
@@ -138,56 +138,56 @@ movq %%mm1, (%1) # Store YUYV \n\
#define MMX_END _mm_empty()
#define MMX_YUV420_YUYV \
mm1 = _mm_cvtsi32_si64(
(int)*p_u);
\
mm2 = _mm_cvtsi32_si64(
(int)*p_v);
\
mm1 = _mm_cvtsi32_si64(
*(int*)p_u);
\
mm2 = _mm_cvtsi32_si64(
*(int*)p_v);
\
mm0 = (__m64)*(uint64_t*)p_y1; \
mm3 = (__m64)*(uint64_t*)p_y2; \
mm1 = _mm_unpacklo_pi8(mm1, mm2); \
mm2 = mm0; \
mm2 = _mm_unpacklo_pi8(mm2, mm1); \
*(uin
64_t)p_line1 = (uint64)mm2;
\
*(uin
t64_t*)p_line1 = (uint64_t)mm2;
\
mm0 = _mm_unpackhi_pi8(mm0, mm1); \
*(uin
64_t)(p_line1 + 4) = (uint64)mm0;
\
*(uin
t64_t*)(p_line1+8) = (uint64_t)mm0;
\
mm4 = mm3; \
mm4 = _mm_unpacklo_pi8(mm4, mm1); \
*(uin
64_t)p_line2 = (uint64)mm4;
\
*(uin
t64_t*)p_line2 = (uint64_t)mm4;
\
mm3 = _mm_unpackhi_pi8(mm3, mm1); \
*(uin
64_t)(p_line2 + 4) = (uint64)mm4
;
*(uin
t64_t*)(p_line2+8) = (uint64_t)mm3
;
#define MMX_YUV420_YVYU \
mm2 = _mm_cvtsi32_si64(
(int)*p_u);
\
mm1 = _mm_cvtsi32_si64(
(int)*p_v);
\
mm2 = _mm_cvtsi32_si64(
*(int*)p_u);
\
mm1 = _mm_cvtsi32_si64(
*(int*)p_v);
\
mm0 = (__m64)*(uint64_t*)p_y1; \
mm3 = (__m64)*(uint64_t*)p_y2; \
mm1 = _mm_unpacklo_pi8(mm1, mm2); \
mm2 = mm0; \
mm2 = _mm_unpacklo_pi8(mm2, mm1); \
*(uin
64_t)p_line1 = (uint64)mm2;
\
*(uin
t64_t*)p_line1 = (uint64_t)mm2;
\
mm0 = _mm_unpackhi_pi8(mm0, mm1); \
*(uin
64_t)(p_line1 + 4) = (uint64)mm0;
\
*(uin
t64_t*)(p_line1+8) = (uint64_t)mm0;
\
mm4 = mm3; \
mm4 = _mm_unpacklo_pi8(mm4, mm1); \
*(uin
64_t)p_line2 = (uint64)mm4;
\
*(uin
t64_t*)p_line2 = (uint64_t)mm4;
\
mm3 = _mm_unpackhi_pi8(mm3, mm1); \
*(uin
64_t)(p_line2 + 4) = (uint64)mm4
;
*(uin
t64_t*)(p_line2+8) = (uint64_t)mm3
;
#define MMX_YUV420_UYVY \
mm1 = _mm_cvtsi32_si64(
(int)*p_u);
\
mm2 = _mm_cvtsi32_si64(
(int)*p_v);
\
mm1 = _mm_cvtsi32_si64(
*(int*)p_u);
\
mm2 = _mm_cvtsi32_si64(
*(int*)p_v);
\
mm0 = (__m64)*(uint64_t*)p_y1; \
mm3 = (__m64)*(uint64_t*)p_y2; \
mm1 = _mm_unpacklo_pi8(mm1, mm2); \
mm2 = mm1; \
mm2 = _mm_unpacklo_pi8(mm2, mm0); \
*(uin
64_t)p_line1 = (uint64)mm2;
\
*(uin
t64_t*)p_line1 = (uint64_t)mm2;
\
mm2 = mm1; \
mm2 = _mm_unpackhi_pi8(mm2, mm0); \
*(uin
64_t)(p_line1 + 4) = (uint64)mm2;
\
*(uin
t64_t*)(p_line1+8) = (uint64_t)mm2;
\
mm4 = mm1; \
mm4 = _mm_unpacklo_pi8(mm4, mm3); \
*(uin
64_t)p_line2 = (uint64)mm4;
\
*(uin
t64_t*)p_line2 = (uint64_t)mm4;
\
mm1 = _mm_unpackhi_pi8(mm1, mm3); \
*(uin
64_t)(p_line2 + 4) = (uint64
)mm1;
*(uin
t64_t*)(p_line2+8) = (uint64_t
)mm1;
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment