Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc-gpu
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc-gpu
Commits
5e4dc54c
Commit
5e4dc54c
authored
Aug 02, 2007
by
Damien Fouilleul
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
chromas: more SSE2/MMX fixes, added I420_RGBA conversion
parent
c23c9ae9
Changes
5
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
575 additions
and
171 deletions
+575
-171
modules/video_chroma/i420_rgb.c
modules/video_chroma/i420_rgb.c
+1
-2
modules/video_chroma/i420_rgb.h
modules/video_chroma/i420_rgb.h
+1
-0
modules/video_chroma/i420_rgb16.c
modules/video_chroma/i420_rgb16.c
+239
-0
modules/video_chroma/i420_rgb_mmx.h
modules/video_chroma/i420_rgb_mmx.h
+316
-151
modules/video_chroma/i420_yuy2.h
modules/video_chroma/i420_yuy2.h
+18
-18
No files found.
modules/video_chroma/i420_rgb.c
View file @
5e4dc54c
...
...
@@ -161,8 +161,7 @@ static int Activate( vlc_object_t *p_this )
{
/* R8G8B8A8 pixel format */
msg_Dbg
(
p_this
,
"RGB pixel format is R8G8B8A8"
);
//p_vout->chroma.pf_convert = E_(I420_B8G8R8A8);
return
-
1
;
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_R8G8B8A8
);
}
else
if
(
p_vout
->
output
.
i_rmask
==
0x0000ff00
&&
p_vout
->
output
.
i_gmask
==
0x00ff0000
...
...
modules/video_chroma/i420_rgb.h
View file @
5e4dc54c
...
...
@@ -64,6 +64,7 @@ void E_(I420_RGB32) ( vout_thread_t *, picture_t *, picture_t * );
void
E_
(
I420_R5G5B5
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_R5G6B5
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_A8R8G8B8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_R8G8B8A8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_B8G8R8A8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_A8B8G8R8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
#endif
...
...
modules/video_chroma/i420_rgb16.c
View file @
5e4dc54c
...
...
@@ -1140,6 +1140,245 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
#endif
}
void
E_
(
I420_R8G8B8A8
)(
vout_thread_t
*
p_vout
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint32_t
*
p_pic
=
(
uint32_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
vlc_bool_t
b_hscale
;
/* horizontal scaling type */
unsigned
int
i_vscale
;
/* vertical scaling type */
unsigned
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_right_margin
;
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_vout
->
render
.
i_width
/
2
;
/* chroma width */
uint32_t
*
p_pic_start
;
/* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t
*
p_buffer_start
=
(
uint32_t
*
)
p_vout
->
chroma
.
p_sys
->
p_buffer
;
uint32_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_vout
->
chroma
.
p_sys
->
p_offset
;
int
*
p_offset
;
const
int
i_source_margin
=
p_src
->
p
[
0
].
i_pitch
-
p_src
->
p
[
0
].
i_visible_pitch
;
const
int
i_source_margin_c
=
p_src
->
p
[
1
].
i_pitch
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset
(
p_vout
->
render
.
i_width
,
p_vout
->
render
.
i_height
,
p_vout
->
output
.
i_width
,
p_vout
->
output
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_vout
->
output
.
i_height
:
p_vout
->
render
.
i_height
;
#if defined (MODULE_NAME_IS_i420_rgb_sse2)
if
(
p_vout
->
render
.
i_width
&
15
)
{
i_rewind
=
16
-
(
p_vout
->
render
.
i_width
&
15
);
}
else
{
i_rewind
=
0
;
}
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
if
(
0
==
(
15
&
(
p_src
->
p
[
Y_PLANE
].
i_pitch
|
p_dest
->
p
->
i_pitch
|
((
int
)
p_y
)
|
((
int
)
p_buffer
)))
)
{
/* use faster SSE2 aligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_vout
->
render
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
for
(
i_x
=
p_vout
->
render
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
4
;
p_v
+=
4
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_vout
->
render
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_vout
->
render
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END
;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
if
(
p_vout
->
render
.
i_width
&
7
)
{
i_rewind
=
8
-
(
p_vout
->
render
.
i_width
&
7
);
}
else
{
i_rewind
=
0
;
}
for
(
i_y
=
0
;
i_y
<
p_vout
->
render
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_vout
->
render
.
i_width
/
8
;
i_x
--
;
)
{
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_RGBA
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_RGBA
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
}
/* re-enable FPU registers */
MMX_END
;
#endif
}
void
E_
(
I420_B8G8R8A8
)(
vout_thread_t
*
p_vout
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
...
...
modules/video_chroma/i420_rgb_mmx.h
View file @
5e4dc54c
This diff is collapsed.
Click to expand it.
modules/video_chroma/i420_yuy2.h
View file @
5e4dc54c
...
...
@@ -138,56 +138,56 @@ movq %%mm1, (%1) # Store YUYV \n\
#define MMX_END _mm_empty()
#define MMX_YUV420_YUYV \
mm1 = _mm_cvtsi32_si64(
(int)*p_u);
\
mm2 = _mm_cvtsi32_si64(
(int)*p_v);
\
mm1 = _mm_cvtsi32_si64(
*(int*)p_u);
\
mm2 = _mm_cvtsi32_si64(
*(int*)p_v);
\
mm0 = (__m64)*(uint64_t*)p_y1; \
mm3 = (__m64)*(uint64_t*)p_y2; \
mm1 = _mm_unpacklo_pi8(mm1, mm2); \
mm2 = mm0; \
mm2 = _mm_unpacklo_pi8(mm2, mm1); \
*(uin
64_t)p_line1 = (uint64)mm2;
\
*(uin
t64_t*)p_line1 = (uint64_t)mm2;
\
mm0 = _mm_unpackhi_pi8(mm0, mm1); \
*(uin
64_t)(p_line1 + 4) = (uint64)mm0;
\
*(uin
t64_t*)(p_line1+8) = (uint64_t)mm0;
\
mm4 = mm3; \
mm4 = _mm_unpacklo_pi8(mm4, mm1); \
*(uin
64_t)p_line2 = (uint64)mm4;
\
*(uin
t64_t*)p_line2 = (uint64_t)mm4;
\
mm3 = _mm_unpackhi_pi8(mm3, mm1); \
*(uin
64_t)(p_line2 + 4) = (uint64)mm4
;
*(uin
t64_t*)(p_line2+8) = (uint64_t)mm3
;
#define MMX_YUV420_YVYU \
mm2 = _mm_cvtsi32_si64(
(int)*p_u);
\
mm1 = _mm_cvtsi32_si64(
(int)*p_v);
\
mm2 = _mm_cvtsi32_si64(
*(int*)p_u);
\
mm1 = _mm_cvtsi32_si64(
*(int*)p_v);
\
mm0 = (__m64)*(uint64_t*)p_y1; \
mm3 = (__m64)*(uint64_t*)p_y2; \
mm1 = _mm_unpacklo_pi8(mm1, mm2); \
mm2 = mm0; \
mm2 = _mm_unpacklo_pi8(mm2, mm1); \
*(uin
64_t)p_line1 = (uint64)mm2;
\
*(uin
t64_t*)p_line1 = (uint64_t)mm2;
\
mm0 = _mm_unpackhi_pi8(mm0, mm1); \
*(uin
64_t)(p_line1 + 4) = (uint64)mm0;
\
*(uin
t64_t*)(p_line1+8) = (uint64_t)mm0;
\
mm4 = mm3; \
mm4 = _mm_unpacklo_pi8(mm4, mm1); \
*(uin
64_t)p_line2 = (uint64)mm4;
\
*(uin
t64_t*)p_line2 = (uint64_t)mm4;
\
mm3 = _mm_unpackhi_pi8(mm3, mm1); \
*(uin
64_t)(p_line2 + 4) = (uint64)mm4
;
*(uin
t64_t*)(p_line2+8) = (uint64_t)mm3
;
#define MMX_YUV420_UYVY \
mm1 = _mm_cvtsi32_si64(
(int)*p_u);
\
mm2 = _mm_cvtsi32_si64(
(int)*p_v);
\
mm1 = _mm_cvtsi32_si64(
*(int*)p_u);
\
mm2 = _mm_cvtsi32_si64(
*(int*)p_v);
\
mm0 = (__m64)*(uint64_t*)p_y1; \
mm3 = (__m64)*(uint64_t*)p_y2; \
mm1 = _mm_unpacklo_pi8(mm1, mm2); \
mm2 = mm1; \
mm2 = _mm_unpacklo_pi8(mm2, mm0); \
*(uin
64_t)p_line1 = (uint64)mm2;
\
*(uin
t64_t*)p_line1 = (uint64_t)mm2;
\
mm2 = mm1; \
mm2 = _mm_unpackhi_pi8(mm2, mm0); \
*(uin
64_t)(p_line1 + 4) = (uint64)mm2;
\
*(uin
t64_t*)(p_line1+8) = (uint64_t)mm2;
\
mm4 = mm1; \
mm4 = _mm_unpacklo_pi8(mm4, mm3); \
*(uin
64_t)p_line2 = (uint64)mm4;
\
*(uin
t64_t*)p_line2 = (uint64_t)mm4;
\
mm1 = _mm_unpackhi_pi8(mm1, mm3); \
*(uin
64_t)(p_line2 + 4) = (uint64
)mm1;
*(uin
t64_t*)(p_line2+8) = (uint64_t
)mm1;
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment