Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc
Commits
9ab4320a
Commit
9ab4320a
authored
Mar 20, 2001
by
Christophe Massiot
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
* Added IDCT Altivec optimization [MacOS X port]. Untested, not compiled.
parent
63f29665
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
426 additions
and
1 deletion
+426
-1
Makefile.in
Makefile.in
+13
-1
plugins/idct/idctaltivec.c
plugins/idct/idctaltivec.c
+175
-0
plugins/idct/idctaltivec.h
plugins/idct/idctaltivec.h
+238
-0
No files found.
Makefile.in
View file @
9ab4320a
...
...
@@ -368,6 +368,9 @@ PLUGIN_IDCTMMX = plugins/idct/idctmmx.o \
PLUGIN_IDCTMMXEXT
=
plugins/idct/idctmmxext.o
\
plugins/idct/idct_common.o
PLUGIN_IDCTALTIVEC
=
plugins/idct/idctaltivec.o
\
plugins/idct/idct_common.o
PLUGIN_MACOSX
=
plugins/macosx/macosx.o
\
plugins/macosx/intf_macosx.o
\
plugins/macosx/vout_macosx.o
...
...
@@ -451,7 +454,8 @@ NONSTD_PLUGIN_OBJ = \
$(PLUGIN_X11)
\
$(PLUGIN_GLIDE)
\
$(PLUGIN_GTK)
\
$(PLUGIN_GNOME)
$(PLUGIN_GNOME)
\
$(PLUGIN_IDCT_ALTIVEC)
NONSTD_CPP_PLUGIN_OBJ
=
\
$(PLUGIN_BEOS)
\
...
...
@@ -628,6 +632,11 @@ $(PLUGIN_BEOS): %.o: .dep/%.dpp
$(PLUGIN_BEOS)
:
%.o: %.cpp
$(CC)
$(CFLAGS)
$(PCFLAGS)
-c
-o
$@
$<
$(PLUGIN_IDCTALTIVEC)
:
%.o: Makefile.dep
$(PLUGIN_IDCTALTIVEC)
:
%.o: .dep/%.d
$(PLUGIN_IDCTALTIVEC)
:
%.o: %.c
$(CC)
$(CFLAGS)
$(PCFLAGS)
-c
-o
$@
$<
-faltivec
#
# Main application target
#
...
...
@@ -793,6 +802,9 @@ lib/idctmmx.so: $(PLUGIN_IDCTMMX)
lib/idctmmxext.so
:
$(PLUGIN_IDCTMMXEXT)
$(CC)
$(PCFLAGS)
$(PLCFLAGS)
-o
$@
$^
lib/idctaltivec.so
:
$(PLUGIN_IDCTALTIVEC)
$(CC)
$(PCFLAGS)
$(PLCFLAGS)
-o
$@
$^
-framework
VecLib
endif
################################################################################
...
...
plugins/idct/idctaltivec.c
0 → 100644
View file @
9ab4320a
/*****************************************************************************
* idctaltivec.c : Altivec IDCT module
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
* $Id: idctaltivec.c,v 1.1 2001/03/20 20:09:37 massiot Exp $
*
* Authors: Christophe Massiot <massiot@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#define MODULE_NAME idctmmxext
/*****************************************************************************
* Preamble
*****************************************************************************/
#include "defs.h"
#include <stdlib.h>
#include "config.h"
#include "common.h"
#include "threads.h"
#include "mtime.h"
#include "tests.h"
/* TestCPU() */
#include "video.h"
#include "video_output.h"
#include "video_decoder.h"
#include "modules.h"
#include "modules_inner.h"
#include "idct.h"
#include "idctaltivec.h"
/*****************************************************************************
* Local prototypes.
*****************************************************************************/
static
void
idct_getfunctions
(
function_list_t
*
p_function_list
);
static
int
idct_Probe
(
probedata_t
*
p_data
);
static
void
vdec_NormScan
(
u8
ppi_scan
[
2
][
64
]
);
/*****************************************************************************
* Build configuration tree.
*****************************************************************************/
MODULE_CONFIG_START
ADD_WINDOW
(
"Configuration for Altivec IDCT module"
)
ADD_COMMENT
(
"Ha, ha -- nothing to configure yet"
)
MODULE_CONFIG_END
/*****************************************************************************
* InitModule: get the module structure and configuration.
*****************************************************************************
* We have to fill psz_name, psz_longname and psz_version. These variables
* will be strdup()ed later by the main application because the module can
* be unloaded later to save memory, and we want to be able to access this
* data even after the module has been unloaded.
*****************************************************************************/
MODULE_INIT
{
p_module
->
psz_name
=
MODULE_STRING
;
p_module
->
psz_longname
=
"Altivec IDCT module"
;
p_module
->
psz_version
=
VERSION
;
p_module
->
i_capabilities
=
MODULE_CAPABILITY_NULL
|
MODULE_CAPABILITY_IDCT
;
return
(
0
);
}
/*****************************************************************************
* ActivateModule: set the module to an usable state.
*****************************************************************************
* This function fills the capability functions and the configuration
* structure. Once ActivateModule() has been called, the i_usage can
* be set to 0 and calls to NeedModule() be made to increment it. To unload
* the module, one has to wait until i_usage == 0 and call DeactivateModule().
*****************************************************************************/
MODULE_ACTIVATE
{
p_module
->
p_functions
=
malloc
(
sizeof
(
module_functions_t
)
);
if
(
p_module
->
p_functions
==
NULL
)
{
return
(
-
1
);
}
idct_getfunctions
(
&
p_module
->
p_functions
->
idct
);
p_module
->
p_config
=
p_config
;
return
(
0
);
}
/*****************************************************************************
* DeactivateModule: make sure the module can be unloaded.
*****************************************************************************
* This function must only be called when i_usage == 0. If it successfully
* returns, i_usage can be set to -1 and the module unloaded. Be careful to
* lock usage_lock during the whole process.
*****************************************************************************/
MODULE_DEACTIVATE
{
free
(
p_module
->
p_functions
);
return
(
0
);
}
/* Following functions are local */
/*****************************************************************************
* Functions exported as capabilities.
*****************************************************************************/
static
void
idct_getfunctions
(
function_list_t
*
p_function_list
)
{
p_function_list
->
pf_probe
=
idct_Probe
;
p_function_list
->
functions
.
idct
.
pf_init
=
vdec_InitIDCT
;
p_function_list
->
functions
.
idct
.
pf_sparse_idct
=
vdec_SparseIDCT
;
p_function_list
->
functions
.
idct
.
pf_idct
=
vdec_IDCT
;
p_function_list
->
functions
.
idct
.
pf_norm_scan
=
vdec_NormScan
;
}
/*****************************************************************************
* idct_Probe: return a preference score
*****************************************************************************/
static
int
idct_Probe
(
probedata_t
*
p_data
)
{
if
(
TestCPU
(
CPU_CAPABILITY_ALTIVEC
)
)
{
if
(
TestMethod
(
IDCT_METHOD_VAR
,
"idctaltivec"
)
)
{
return
(
999
);
}
else
{
return
(
200
);
}
}
else
{
return
(
0
);
}
}
/*****************************************************************************
* vdec_NormScan : Soon, transpose
*****************************************************************************/
static
void
vdec_NormScan
(
u8
ppi_scan
[
2
][
64
]
)
{
}
/*****************************************************************************
* vdec_IDCT :
*****************************************************************************/
void
vdec_IDCT
(
vdec_thread_t
*
p_vdec
,
dctelem_t
*
p_block
,
int
i_idontcare
)
{
IDCT
(
p_block
,
p_block
);
}
plugins/idct/idctaltivec.h
0 → 100644
View file @
9ab4320a
/***************************************************************
*
* Copyright: (c) Copyright Motorola Inc. 1998
*
* Date: April 17, 1998
*
* Function: Matrix_Transpose
*
* Description: The following Matrix Transpose is adapted
* from an algorithm developed by Brett Olsson
* from IBM. It performs a 8x8 16-bit element
* full matrix transpose.
*
* Inputs: array elements stored in input
* input[0] = [ 00 01 02 03 04 05 06 07 ]
* input[1] = [ 10 11 12 13 14 15 16 17 ]
* input[2] = [ 20 21 22 23 24 25 26 27 ]
* input[3] = [ 30 31 32 33 34 35 36 37 ]
* input[4] = [ 40 41 42 43 44 45 46 47 ]
* input[5] = [ 50 51 52 53 54 55 56 57 ]
* input[6] = [ 60 61 62 63 64 65 66 67 ]
* input[7] = [ 70 71 72 73 74 75 76 77 ]
*
* Outputs: transposed elements in output
*
**************************************************************/
static
__inline__
void
Matrix_Transpose
(
vector
signed
short
*
input
,
vector
signed
short
*
output
)
{
vector
signed
short
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
vector
signed
short
b0
,
b1
,
b2
,
b3
,
b4
,
b5
,
b6
,
b7
;
b0
=
vec_mergeh
(
input
[
0
],
input
[
4
]
);
/* [ 00 40 01 41 02 42 03 43 ]*/
b1
=
vec_mergel
(
input
[
0
],
input
[
4
]
);
/* [ 04 44 05 45 06 46 07 47 ]*/
b2
=
vec_mergeh
(
input
[
1
],
input
[
5
]
);
/* [ 10 50 11 51 12 52 13 53 ]*/
b3
=
vec_mergel
(
input
[
1
],
input
[
5
]
);
/* [ 14 54 15 55 16 56 17 57 ]*/
b4
=
vec_mergeh
(
input
[
2
],
input
[
6
]
);
/* [ 20 60 21 61 22 62 23 63 ]*/
b5
=
vec_mergel
(
input
[
2
],
input
[
6
]
);
/* [ 24 64 25 65 26 66 27 67 ]*/
b6
=
vec_mergeh
(
input
[
3
],
input
[
7
]
);
/* [ 30 70 31 71 32 72 33 73 ]*/
b7
=
vec_mergel
(
input
[
3
],
input
[
7
]
);
/* [ 34 74 35 75 36 76 37 77 ]*/
a0
=
vec_mergeh
(
b0
,
b4
);
/* [ 00 20 40 60 01 21 41 61 ]*/
a1
=
vec_mergel
(
b0
,
b4
);
/* [ 02 22 42 62 03 23 43 63 ]*/
a2
=
vec_mergeh
(
b1
,
b5
);
/* [ 04 24 44 64 05 25 45 65 ]*/
a3
=
vec_mergel
(
b1
,
b5
);
/* [ 06 26 46 66 07 27 47 67 ]*/
a4
=
vec_mergeh
(
b2
,
b6
);
/* [ 10 30 50 70 11 31 51 71 ]*/
a5
=
vec_mergel
(
b2
,
b6
);
/* [ 12 32 52 72 13 33 53 73 ]*/
a6
=
vec_mergeh
(
b3
,
b7
);
/* [ 14 34 54 74 15 35 55 75 ]*/
a7
=
vec_mergel
(
b3
,
b7
);
/* [ 16 36 56 76 17 37 57 77 ]*/
output
[
0
]
=
vec_mergeh
(
a0
,
a4
);
/* [ 00 10 20 30 40 50 60 70 ]*/
output
[
1
]
=
vec_mergel
(
a0
,
a4
);
/* [ 01 11 21 31 41 51 61 71 ]*/
output
[
2
]
=
vec_mergeh
(
a1
,
a5
);
/* [ 02 12 22 32 42 52 62 72 ]*/
output
[
3
]
=
vec_mergel
(
a1
,
a5
);
/* [ 03 13 23 33 43 53 63 73 ]*/
output
[
4
]
=
vec_mergeh
(
a2
,
a6
);
/* [ 04 14 24 34 44 54 64 74 ]*/
output
[
5
]
=
vec_mergel
(
a2
,
a6
);
/* [ 05 15 25 35 45 55 65 75 ]*/
output
[
6
]
=
vec_mergeh
(
a3
,
a7
);
/* [ 06 16 26 36 46 56 66 76 ]*/
output
[
7
]
=
vec_mergel
(
a3
,
a7
);
/* [ 07 17 27 37 47 57 67 77 ]*/
}
/***************************************************************
*
* Copyright: (c) Copyright Motorola Inc. 1998
*
* Date: April 20, 1998
*
* Macro: IDCT_Transform
*
* Description: Discrete Cosign Transform implemented by the
* Scaled Chen (III) Algorithm developed by Haifa
* Research Lab. The major difference between this
* algorithm and the Scaled Chen (I) is that
* certain multiply-subtracts are replaced by
* multiply adds. A full description of the
* Scaled Chen (I) algorithm can be found in:
* W.C.Chen, C.H.Smith and S.C.Fralick, "A Fast
* Computational Algorithm for the Discrete Cosine
* Transform", IEEE Transactions on Commnuications,
* Vol. COM-25, No. 9, pp 1004-1009, Sept. 1997.
*
* Inputs: vx : array of vector short
* t1-t10 : temporary vector variables set up by caller
* c4 : cos(4*pi/16)
* mc4 : -c4
* a0 : c6/c2
* a1 : c7/c1
* a2 : c5/c3
* ma2 : -a2
* zero : an array of zero elements
*
* Outputs: vy : array of vector short
*
**************************************************************/
#define IDCT_Transform(vx,vy) \
\
/* 1st stage. */
\
t9 = vec_mradds( a1, vx[1], zero );
/* t8 = (a1) * x1 - x7 */
\
t8 = vec_subs( t9, vx[7]); \
t1 = vec_mradds( a1, vx[7], vx[1] );
/* t1 = (a1) * x7 + x1 */
\
t7 = vec_mradds( a2, vx[5], vx[3] );
/* t7 = (a2) * x5 + x3 */
\
t3 = vec_mradds( ma2, vx[3], vx[5] );
/* t3 = (-a2) * x5 + x3 */
\
\
/* 2nd stage */
\
t5 = vec_adds( vx[0], vx[4] );
/* t5 = x0 + x4 */
\
t0 = vec_subs( vx[0], vx[4] );
/* t0 = x0 - x4 */
\
t9 = vec_mradds( a0, vx[2], zero );
/* t4 = (a0) * x2 - x6 */
\
t4 = vec_subs( t9, vx[6] ); \
t2 = vec_mradds( a0, vx[6], vx[2] );
/* t2 = (a0) * x6 + x2 */
\
\
t6 = vec_adds( t8, t3 );
/* t6 = t8 + t3 */
\
t3 = vec_subs( t8, t3 );
/* t3 = t8 - t3 */
\
t8 = vec_subs( t1, t7 );
/* t8 = t1 - t7 */
\
t1 = vec_adds( t1, t7 );
/* t1 = t1 + t7 */
\
\
/* 3rd stage. */
\
t7 = vec_adds( t5, t2 );
/* t7 = t5 + t2 */
\
t2 = vec_subs( t5, t2 );
/* t2 = t5 - t2 */
\
t5 = vec_adds( t0, t4 );
/* t5 = t0 + t4 */
\
t0 = vec_subs( t0, t4 );
/* t0 = t0 - t4 */
\
\
t4 = vec_subs( t8, t3 );
/* t4 = t8 - t3 */
\
t3 = vec_adds( t8, t3 );
/* t3 = t8 + t3 */
\
\
/* 4th stage. */
\
vy[0] = vec_adds( t7, t1 );
/* y0 = t7 + t1 */
\
vy[7] = vec_subs( t7, t1 );
/* y7 = t7 - t1 */
\
vy[1] = vec_mradds( c4, t3, t5 );
/* y1 = (c4) * t3 + t5 */
\
vy[6] = vec_mradds( mc4, t3, t5 );
/* y6 = (-c4) * t3 + t5 */
\
vy[2] = vec_mradds( c4, t4, t0 );
/* y2 = (c4) * t4 + t0 */
\
vy[5] = vec_mradds( mc4, t4, t0 );
/* y5 = (-c4) * t4 + t0 */
\
vy[3] = vec_adds( t2, t6 );
/* y3 = t2 + t6 */
\
vy[4] = vec_subs( t2, t6 );
/* y4 = t2 - t6 */
/* Pre-Scaling matrix -- scaled by 1 */
static
vector
signed
short
PreScale
[
8
]
=
{
(
vector
signed
short
)(
4095
,
5681
,
5351
,
4816
,
4095
,
4816
,
5351
,
5681
),
(
vector
signed
short
)(
5681
,
7880
,
7422
,
6680
,
5681
,
6680
,
7422
,
7880
),
(
vector
signed
short
)(
5351
,
7422
,
6992
,
6292
,
5351
,
6292
,
6992
,
7422
),
(
vector
signed
short
)(
4816
,
6680
,
6292
,
5663
,
4816
,
5663
,
6292
,
6680
),
(
vector
signed
short
)(
4095
,
5681
,
5351
,
4816
,
4095
,
4816
,
5351
,
5681
),
(
vector
signed
short
)(
4816
,
6680
,
6292
,
5663
,
4816
,
5663
,
6292
,
6680
),
(
vector
signed
short
)(
5351
,
7422
,
6992
,
6292
,
5351
,
6292
,
6992
,
7422
),
(
vector
signed
short
)(
5681
,
7880
,
7422
,
6680
,
5681
,
6680
,
7422
,
7880
)
};
/***************************************************************
*
* Copyright: (c) Copyright Motorola Inc. 1998
*
* Date: April 17, 1998
*
* Function: IDCT
*
* Description: Scaled Chen (III) algorithm for IDCT
* Arithmetic is 16-bit fixed point.
*
* Inputs: input - Pointer to input data (short), which
* must be between -2048 to +2047.
* It is assumed that the allocated array
* has been 128-bit aligned and contains
* 8x8 short elements.
*
* Outputs: output - Pointer to output area for the transfored
* data. The output values are between -255
* and 255 . It is assumed that a 128-bit
* aligned 8x8 array of short has been
* pre-allocated.
*
* Return: None
*
***************************************************************/
static
__inline__
void
IDCT
(
short
*
input
,
short
*
output
)
{
vector
signed
short
t0
,
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
t9
;
vector
signed
short
a0
,
a1
,
a2
,
ma2
,
c4
,
mc4
,
zero
;
vector
signed
short
vx
[
8
],
vy
[
8
];
vector
signed
short
*
vec_ptr
;
/* used for conversion between
arrays of short and vector
signed short array. */
/* Load the multiplication constants. Note: these constants
* could all be loaded directly ( like zero case ), but using the
* SpecialConstants approach causes vsplth instructions to be
* generated instead of lvx which is more efficient given the remainder
* of the instruction mix.
*/
vector
signed
short
SpecialConstants
=
(
vector
signed
short
)(
23170
,
13573
,
6518
,
21895
,
-
23170
,
-
21895
,
0
,
0
);
c4
=
vec_splat
(
SpecialConstants
,
0
);
/* c4 = cos(4*pi/16) */
a0
=
vec_splat
(
SpecialConstants
,
1
);
/* a0 = c6/c2 */
a1
=
vec_splat
(
SpecialConstants
,
2
);
/* a1 = c7/c1 */
a2
=
vec_splat
(
SpecialConstants
,
3
);
/* a2 = c5/c3 */
mc4
=
vec_splat
(
SpecialConstants
,
4
);
/* -c4 */
ma2
=
vec_splat
(
SpecialConstants
,
5
);
/* -a2 */
zero
=
(
vector
signed
short
)(
0
);
/* Load the rows of input data and Pre-Scale them. */
vec_ptr
=
(
vector
signed
short
*
)
input
;
vx
[
0
]
=
vec_mradds
(
vec_ptr
[
0
],
PreScale
[
0
],
zero
);
vx
[
1
]
=
vec_mradds
(
vec_ptr
[
1
],
PreScale
[
1
],
zero
);
vx
[
2
]
=
vec_mradds
(
vec_ptr
[
2
],
PreScale
[
2
],
zero
);
vx
[
3
]
=
vec_mradds
(
vec_ptr
[
3
],
PreScale
[
3
],
zero
);
vx
[
4
]
=
vec_mradds
(
vec_ptr
[
4
],
PreScale
[
4
],
zero
);
vx
[
5
]
=
vec_mradds
(
vec_ptr
[
5
],
PreScale
[
5
],
zero
);
vx
[
6
]
=
vec_mradds
(
vec_ptr
[
6
],
PreScale
[
6
],
zero
);
vx
[
7
]
=
vec_mradds
(
vec_ptr
[
7
],
PreScale
[
7
],
zero
);
/* Perform IDCT first on the 8 columns */
IDCT_Transform
(
vx
,
vy
);
/* Transpose matrix to work on rows */
Matrix_Transpose
(
vy
,
vx
);
/* Perform IDCT next on the 8 rows */
IDCT_Transform
(
vx
,
vy
);
/* Post-scale and store result. */
vec_ptr
=
(
vector
signed
short
*
)
output
;
vec_ptr
[
0
]
=
vy
[
0
];
vec_ptr
[
1
]
=
vy
[
1
];
vec_ptr
[
2
]
=
vy
[
2
];
vec_ptr
[
3
]
=
vy
[
3
];
vec_ptr
[
4
]
=
vy
[
4
];
vec_ptr
[
5
]
=
vy
[
5
];
vec_ptr
[
6
]
=
vy
[
6
];
vec_ptr
[
7
]
=
vy
[
7
];
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment