Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc-2-2
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc-2-2
Commits
313acca3
Commit
313acca3
authored
Oct 22, 2004
by
Gildas Bazin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
* modules/codec/ffmpeg/postprocessing: removed deprecated postprocessing code.
parent
b15eda88
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
0 additions
and
2783 deletions
+0
-2783
configure.ac
configure.ac
+0
-1
modules/codec/ffmpeg/postprocessing/Modules.am
modules/codec/ffmpeg/postprocessing/Modules.am
+0
-20
modules/codec/ffmpeg/postprocessing/postprocessing.c
modules/codec/ffmpeg/postprocessing/postprocessing.c
+0
-223
modules/codec/ffmpeg/postprocessing/postprocessing.h
modules/codec/ffmpeg/postprocessing/postprocessing.h
+0
-56
modules/codec/ffmpeg/postprocessing/postprocessing_c.c
modules/codec/ffmpeg/postprocessing/postprocessing_c.c
+0
-625
modules/codec/ffmpeg/postprocessing/postprocessing_common.h
modules/codec/ffmpeg/postprocessing/postprocessing_common.h
+0
-52
modules/codec/ffmpeg/postprocessing/postprocessing_mmx.c
modules/codec/ffmpeg/postprocessing/postprocessing_mmx.c
+0
-889
modules/codec/ffmpeg/postprocessing/postprocessing_mmxext.c
modules/codec/ffmpeg/postprocessing/postprocessing_mmxext.c
+0
-917
No files found.
configure.ac
View file @
313acca3
...
...
@@ -4138,7 +4138,6 @@ AC_CONFIG_FILES([
modules/codec/cmml/Makefile
modules/codec/dmo/Makefile
modules/codec/ffmpeg/Makefile
modules/codec/ffmpeg/postprocessing/Makefile
modules/codec/ogt/Makefile
modules/codec/spudec/Makefile
modules/control/Makefile
...
...
modules/codec/ffmpeg/postprocessing/Modules.am
deleted
100644 → 0
View file @
b15eda88
SOURCES_postprocessing_c = \
postprocessing.c \
postprocessing_c.c \
$(NULL)
SOURCES_postprocessing_mmx = \
postprocessing.c \
postprocessing_mmx.c \
$(NULL)
SOURCES_postprocessing_mmxext = \
postprocessing.c \
postprocessing_mmxext.c \
$(NULL)
noinst_HEADERS += \
postprocessing.h \
postprocessing_common.h \
$(NULL)
modules/codec/ffmpeg/postprocessing/postprocessing.c
deleted
100644 → 0
View file @
b15eda88
/*****************************************************************************
* postprocessing.c
*****************************************************************************
* Copyright (C) 1999-2001 VideoLAN
* $Id$
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
/*****************************************************************************
* Preamble
*****************************************************************************/
#include <stdlib.h>
#include <string.h>
#include <vlc/vlc.h>
#include <vlc/vout.h>
#include "postprocessing.h"
#include "postprocessing_common.h"
static
int
Open
(
vlc_object_t
*
p_this
);
static
uint32_t
pp_getmode
(
int
i_quality
,
int
b_autolevel
);
static
int
pp_postprocess
(
picture_t
*
,
QT_STORE_T
*
,
unsigned
int
,
unsigned
int
i_mode
);
/*****************************************************************************
* Module descriptor
*****************************************************************************/
vlc_module_begin
();
#ifdef MODULE_NAME_IS_postprocessing_c
set_description
(
_
(
"C post processing"
)
);
set_capability
(
"postprocessing"
,
50
);
add_shortcut
(
"c"
);
#elif defined( MODULE_NAME_IS_postprocessing_mmx )
set_description
(
_
(
"MMX post processing"
)
);
set_capability
(
"postprocessing"
,
100
);
add_requirement
(
MMX
);
add_shortcut
(
"mmx"
);
#elif defined( MODULE_NAME_IS_postprocessing_mmxext )
set_description
(
_
(
"MMX EXT post processing"
)
);
set_capability
(
"postprocessing"
,
150
);
add_requirement
(
MMXEXT
);
add_shortcut
(
"mmxext"
);
add_shortcut
(
"mmx2"
);
#endif
set_callbacks
(
Open
,
NULL
);
vlc_module_end
();
/*****************************************************************************
* Module initializer
*****************************************************************************/
static
int
Open
(
vlc_object_t
*
p_this
)
{
postprocessing_t
*
p_pp
=
(
postprocessing_t
*
)
p_this
;
p_pp
->
pf_getmode
=
pp_getmode
;
p_pp
->
pf_postprocess
=
pp_postprocess
;
return
VLC_SUCCESS
;
}
static
uint32_t
pp_getmode
(
int
i_quality
,
int
b_autolevel
)
{
uint32_t
i_mode
;
i_quality
=
i_quality
<
0
?
0
:
i_quality
;
i_quality
=
i_quality
>
6
?
6
:
i_quality
;
switch
(
i_quality
)
{
case
(
0
):
i_mode
=
0
;
break
;
case
(
1
):
i_mode
=
PP_DEBLOCK_Y_H
;
break
;
case
(
2
):
i_mode
=
PP_DEBLOCK_Y_H
|
PP_DEBLOCK_Y_V
;
break
;
case
(
3
):
i_mode
=
PP_DEBLOCK_Y_H
|
PP_DEBLOCK_Y_V
|
PP_DEBLOCK_C_H
;
break
;
case
(
4
):
i_mode
=
PP_DEBLOCK_Y_H
|
PP_DEBLOCK_Y_V
|
PP_DEBLOCK_C_H
|
PP_DEBLOCK_C_V
;
break
;
case
(
5
):
i_mode
=
PP_DEBLOCK_Y_H
|
PP_DEBLOCK_Y_V
|
PP_DEBLOCK_C_H
|
PP_DEBLOCK_C_V
|
PP_DERING_Y
;
break
;
case
(
6
):
i_mode
=
PP_DEBLOCK_Y_H
|
PP_DEBLOCK_Y_V
|
PP_DEBLOCK_C_H
|
PP_DEBLOCK_C_V
|
PP_DERING_Y
|
PP_DERING_C
;
break
;
default:
i_mode
=
0
;
}
if
(
b_autolevel
)
{
i_mode
|=
PP_AUTOLEVEL
;
}
return
(
i_mode
);
}
/*****************************************************************************
* pp_postprocess : make post-filter as defined in MPEG4-ISO
*****************************************************************************
*****************************************************************************/
static
int
pp_postprocess
(
picture_t
*
p_pic
,
QT_STORE_T
*
p_QP_store
,
unsigned
int
i_QP_stride
,
unsigned
int
i_mode
)
{
/* Some sanity checks */
/* if( ( p_pic->i_height&0x0f )||( p_pic->i_width&0x0f )||*/
if
(
(
p_pic
->
p_heap
->
i_chroma
!=
VLC_FOURCC
(
'I'
,
'4'
,
'2'
,
'0'
)
)
&&
(
p_pic
->
p_heap
->
i_chroma
!=
VLC_FOURCC
(
'Y'
,
'V'
,
'1'
,
'2'
)
)
)
{
return
(
PP_ERR_INVALID_PICTURE
);
}
if
(
(
!
p_QP_store
)
||
(
i_QP_stride
<
p_pic
->
p_heap
->
i_width
>>
4
)
)
{
return
(
PP_ERR_INVALID_QP
);
}
/* First do vertical deblocking and then horizontal */
/* Luminance */
if
(
i_mode
&
PP_DEBLOCK_Y_V
)
{
E_
(
pp_deblock_V
)(
p_pic
->
Y_PIXELS
,
p_pic
->
p_heap
->
i_width
,
p_pic
->
p_heap
->
i_height
,
p_pic
->
Y_PITCH
,
p_QP_store
,
i_QP_stride
,
0
);
}
if
(
i_mode
&
PP_DEBLOCK_Y_H
)
{
E_
(
pp_deblock_H
)(
p_pic
->
Y_PIXELS
,
p_pic
->
p_heap
->
i_width
,
p_pic
->
p_heap
->
i_height
,
p_pic
->
Y_PITCH
,
p_QP_store
,
i_QP_stride
,
0
);
}
/* Chrominance */
if
(
i_mode
&
PP_DEBLOCK_C_V
)
{
E_
(
pp_deblock_V
)(
p_pic
->
U_PIXELS
,
p_pic
->
p_heap
->
i_width
>>
1
,
p_pic
->
p_heap
->
i_height
>>
1
,
p_pic
->
U_PITCH
,
p_QP_store
,
i_QP_stride
,
1
);
E_
(
pp_deblock_V
)(
p_pic
->
V_PIXELS
,
p_pic
->
p_heap
->
i_width
>>
1
,
p_pic
->
p_heap
->
i_height
>>
1
,
p_pic
->
V_PITCH
,
p_QP_store
,
i_QP_stride
,
1
);
}
if
(
i_mode
&
PP_DEBLOCK_C_H
)
{
E_
(
pp_deblock_H
)(
p_pic
->
U_PIXELS
,
p_pic
->
p_heap
->
i_width
>>
1
,
p_pic
->
p_heap
->
i_height
>>
1
,
p_pic
->
U_PITCH
,
p_QP_store
,
i_QP_stride
,
1
);
E_
(
pp_deblock_H
)(
p_pic
->
V_PIXELS
,
p_pic
->
p_heap
->
i_width
>>
1
,
p_pic
->
p_heap
->
i_height
>>
1
,
p_pic
->
V_PITCH
,
p_QP_store
,
i_QP_stride
,
1
);
}
/* After deblocking do dering */
/* TODO check for min size */
if
(
i_mode
&
PP_DERING_Y
)
{
E_
(
pp_dering_Y
)(
p_pic
->
Y_PIXELS
,
p_pic
->
p_heap
->
i_width
,
p_pic
->
p_heap
->
i_height
,
p_pic
->
Y_PITCH
,
p_QP_store
,
i_QP_stride
);
}
if
(
i_mode
&
PP_DERING_C
)
{
E_
(
pp_dering_C
)(
p_pic
->
U_PIXELS
,
p_pic
->
p_heap
->
i_width
>>
1
,
p_pic
->
p_heap
->
i_height
>>
1
,
p_pic
->
U_PITCH
,
p_QP_store
,
i_QP_stride
);
E_
(
pp_dering_C
)(
p_pic
->
V_PIXELS
,
p_pic
->
p_heap
->
i_width
>>
1
,
p_pic
->
p_heap
->
i_height
>>
1
,
p_pic
->
V_PITCH
,
p_QP_store
,
i_QP_stride
);
}
#if defined( MODULE_NAME_IS_postprocessing_mmx )||defined( MODULE_NAME_IS_postprocessing_mmxext )
/* We have used MMX so return to safe FPU state */
__asm__
__volatile__
(
"emms"
);
#endif
return
(
PP_ERR_OK
);
}
modules/codec/ffmpeg/postprocessing/postprocessing.h
deleted
100644 → 0
View file @
b15eda88
/*****************************************************************************
* postprocessing.h
*****************************************************************************
* Copyright (C) 2001 VideoLAN
* $Id: postprocessing.h,v 1.3 2003/10/25 00:49:14 sam Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#define QT_STORE_T int8_t
/* postprocessing available using to create i_mode */
#define PP_DEBLOCK_Y_H 0x00000001
#define PP_DEBLOCK_Y_V 0x00000002
#define PP_DEBLOCK_C_H 0x00000004
#define PP_DEBLOCK_C_V 0x00000008
#define PP_DERING_Y 0x00000010
#define PP_DERING_C 0x00000020
#define PP_AUTOLEVEL 0x80000000
/* error code, not really used */
#define PP_ERR_OK 0
/* no problem */
#define PP_ERR_INVALID_PICTURE 1
/* wrong picture size or chroma */
#define PP_ERR_INVALID_QP 2
/* need valid QP to make the postprocess */
#define PP_ERR_UNKNOWN 255
typedef
struct
postprocessing_s
{
VLC_COMMON_MEMBERS
module_t
*
p_module
;
uint32_t
(
*
pf_getmode
)(
int
i_quality
,
int
b_autolevel
);
int
(
*
pf_postprocess
)(
picture_t
*
p_pic
,
QT_STORE_T
*
p_QP_store
,
unsigned
int
i_QP_stride
,
unsigned
int
i_mode
);
}
postprocessing_t
;
modules/codec/ffmpeg/postprocessing/postprocessing_c.c
deleted
100644 → 0
View file @
b15eda88
/*****************************************************************************
* postprocessing_c.c: Post Processing plugin in C
*****************************************************************************
* Copyright (C) 2001 VideoLAN
* $Id: postprocessing_c.c,v 1.2 2002/12/06 16:34:05 sam Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include <vlc/vlc.h>
/* only use uint8_t, uint32_t .... */
#include "postprocessing.h"
#include "postprocessing_common.h"
/*****************************************************************************
*
* Internals functions common to pp_deblock_V and pp_deblock_H
*
*****************************************************************************/
/****************************************************************************
* pp_deblock_isDC_mode : Check if we will use DC mode or Default mode
****************************************************************************
* Use constant PP_THR1 and PP_THR2 ( PP_2xTHR1 )
*
* Called for for each pixel on a boundary block when doing deblocking
* so need to be fast ...
*
****************************************************************************/
static
inline
int
pp_deblock_isDC_mode
(
uint8_t
*
p_v
)
{
unsigned
int
i_eq_cnt
;
/* algo : if ( | v[i] -v[i+1] | <= PP_THR1 ) { i_eq_cnt++; } */
i_eq_cnt
=
0
;
if
((
(
p_v
[
0
]
-
p_v
[
1
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
1
]
-
p_v
[
2
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
2
]
-
p_v
[
3
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
3
]
-
p_v
[
4
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
4
]
-
p_v
[
5
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
5
]
-
p_v
[
6
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
6
]
-
p_v
[
7
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
7
]
-
p_v
[
8
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
8
]
-
p_v
[
9
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
#if 0
int i;
for( i =0; i < 9; i++ )
{
if(( ( p_v[i] - p_v[i+1] + PP_THR1 )&0xffff )<= PP_2xTHR1 )
{
i_eq_cnt++;
}
}
#endif
return
(
(
i_eq_cnt
>=
PP_THR2
)
?
1
:
0
);
}
static
inline
int
pp_deblock_isMinMaxOk
(
uint8_t
*
p_v
,
int
i_QP
)
{
int
i_max
,
i_min
;
i_min
=
i_max
=
p_v
[
1
];
if
(
i_max
<
p_v
[
1
]
)
i_max
=
p_v
[
1
];
if
(
i_min
>
p_v
[
1
]
)
i_min
=
p_v
[
1
];
if
(
i_max
<
p_v
[
2
]
)
i_max
=
p_v
[
2
];
if
(
i_min
>
p_v
[
2
]
)
i_min
=
p_v
[
2
];
if
(
i_max
<
p_v
[
3
]
)
i_max
=
p_v
[
3
];
if
(
i_min
>
p_v
[
3
]
)
i_min
=
p_v
[
3
];
if
(
i_max
<
p_v
[
4
]
)
i_max
=
p_v
[
4
];
if
(
i_min
>
p_v
[
4
]
)
i_min
=
p_v
[
4
];
if
(
i_max
<
p_v
[
5
]
)
i_max
=
p_v
[
5
];
if
(
i_min
>
p_v
[
5
]
)
i_min
=
p_v
[
5
];
if
(
i_max
<
p_v
[
6
]
)
i_max
=
p_v
[
6
];
if
(
i_min
>
p_v
[
6
]
)
i_min
=
p_v
[
6
];
if
(
i_max
<
p_v
[
7
]
)
i_max
=
p_v
[
7
];
if
(
i_min
>
p_v
[
7
]
)
i_min
=
p_v
[
7
];
if
(
i_max
<
p_v
[
8
]
)
i_max
=
p_v
[
8
];
if
(
i_min
>
p_v
[
8
]
)
i_min
=
p_v
[
8
];
#if 0
int i;
int i_range;
for( i = 2; i < 9; i++ )
{
if( i_max < p_v[i] ) i_max = p_v[i];
if( i_min > p_v[i] ) i_min = p_v[i];
}
i_range = i_max - i_min;
#endif
return
(
i_max
-
i_min
<
2
*
i_QP
?
1
:
0
);
}
static
inline
void
pp_deblock_DefaultMode
(
uint8_t
i_v
[
10
],
int
i_stride
,
int
i_QP
)
{
int
d
,
i_delta
;
int
a3x0
,
a3x0_
,
a3x1
,
a3x2
;
int
b_neg
;
/* d = CLIP( 5(a3x0' - a3x0)//8, 0, (v4-v5)/2 ).d( abs(a3x0) < QP ) */
/* First calculate a3x0 */
a3x0
=
2
*
(
i_v
[
3
]
-
i_v
[
6
]
)
+
5
*
(
i_v
[
5
]
-
i_v
[
4
]
);
if
(
a3x0
<
0
)
{
b_neg
=
1
;
a3x0
=
-
a3x0
;
}
else
{
b_neg
=
0
;
}
/* XXX Now a3x0 is abs( a3x0 ) */
if
(
(
a3x0
<
8
*
i_QP
)
&&
(
a3x0
!=
0
)
)
/* |a3x0| < 8*i_QP */
{
/* calculate a3x1 et a3x2 */
a3x1
=
2
*
(
i_v
[
1
]
-
i_v
[
4
]
)
+
5
*
(
i_v
[
3
]
-
i_v
[
2
]
);
a3x2
=
2
*
(
i_v
[
5
]
-
i_v
[
8
]
)
+
5
*
(
i_v
[
7
]
-
i_v
[
6
]
);
if
(
a3x1
<
0
)
a3x1
=
-
a3x1
;
/* abs( a3x1 ) */
if
(
a3x2
<
0
)
a3x2
=
-
a3x2
;
/* abs( a3x2 ) */
a3x0_
=
PP_MIN3
(
a3x0
,
a3x1
,
a3x2
);
d
=
5
*
(
a3x0
-
a3x0_
)
/
8
;
/* always > 0 */
i_delta
=
(
i_v
[
4
]
-
i_v
[
5
]
)
/
2
;
/* clip into [0, i_delta] or [i_delta, 0] */
if
(
i_delta
<
0
)
{
if
(
!
b_neg
)
/* since true d has sgn(d) = - sgn( a3x0 ) */
{
d
=
-
d
;
if
(
d
<
i_delta
)
d
=
i_delta
;
i_v
[
4
]
-=
d
;
i_v
[
5
]
+=
d
;
}
}
else
{
if
(
b_neg
)
{
if
(
d
>
i_delta
)
d
=
i_delta
;
i_v
[
4
]
-=
d
;
i_v
[
5
]
+=
d
;
}
}
}
}
static
inline
void
pp_deblock_DCMode
(
uint8_t
*
p_v
,
/* = int i_v[10] */
int
i_QP
)
{
int
v
[
10
];
int
i
;
int
i_p0
,
i_p9
;
i_p0
=
PP_ABS
(
p_v
[
1
]
-
p_v
[
0
]
)
<
i_QP
?
p_v
[
0
]
:
p_v
[
1
];
i_p9
=
PP_ABS
(
p_v
[
8
]
-
p_v
[
9
]
)
<
i_QP
?
p_v
[
9
]
:
p_v
[
8
];
for
(
i
=
1
;
i
<
9
;
i
++
)
{
v
[
i
]
=
p_v
[
i
];
/* save 8 pix that will be modified */
}
p_v
[
1
]
=
(
6
*
i_p0
+
4
*
v
[
1
]
+
2
*
(
v
[
2
]
+
v
[
3
])
+
v
[
4
]
+
v
[
5
])
>>
4
;
p_v
[
2
]
=
(
4
*
i_p0
+
2
*
v
[
1
]
+
4
*
v
[
2
]
+
2
*
(
v
[
3
]
+
v
[
4
])
+
v
[
5
]
+
v
[
6
])
>>
4
;
p_v
[
3
]
=
(
2
*
i_p0
+
2
*
(
v
[
1
]
+
v
[
2
])
+
4
*
v
[
3
]
+
2
*
(
v
[
4
]
+
v
[
5
])
+
v
[
6
]
+
v
[
7
])
>>
4
;
p_v
[
4
]
=
(
i_p0
+
v
[
1
]
+
2
*
(
v
[
2
]
+
v
[
3
])
+
4
*
v
[
4
]
+
2
*
(
v
[
5
]
+
v
[
6
])
+
v
[
7
]
+
v
[
8
])
>>
4
;
p_v
[
5
]
=
(
v
[
1
]
+
v
[
2
]
+
2
*
(
v
[
3
]
+
v
[
4
])
+
4
*
v
[
5
]
+
2
*
(
v
[
6
]
+
v
[
7
])
+
v
[
8
]
+
i_p9
)
>>
4
;
p_v
[
6
]
=
(
v
[
2
]
+
v
[
3
]
+
2
*
(
v
[
4
]
+
v
[
5
])
+
4
*
v
[
6
]
+
2
*
(
v
[
7
]
+
v
[
8
])
+
2
*
i_p9
)
>>
4
;
p_v
[
7
]
=
(
v
[
3
]
+
v
[
4
]
+
2
*
(
v
[
5
]
+
v
[
6
])
+
4
*
v
[
7
]
+
2
*
v
[
8
]
+
4
*
i_p9
)
>>
4
;
p_v
[
8
]
=
(
v
[
4
]
+
v
[
5
]
+
2
*
(
v
[
6
]
+
v
[
7
])
+
4
*
v
[
8
]
+
6
*
i_p9
)
>>
4
;
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* ---------- filter Vertical lines so follow horizontal edges -------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_deblock_V
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
,
int
b_chroma
)
{
int
x
,
y
,
i
;
uint8_t
*
p_v
;
int
i_QP_scale
;
/* use to do ( ? >> i_QP_scale ) */
int
i_QP
;
uint8_t
i_v
[
10
];
i_QP_scale
=
b_chroma
?
5
:
4
;
for
(
y
=
8
;
y
<
i_height
-
4
;
y
+=
8
)
{
p_v
=
p_plane
+
(
y
-
5
)
*
i_stride
;
for
(
x
=
0
;
x
<
i_width
;
x
++
)
{
/* First get 10 vert pix to use them without i_stride */
for
(
i
=
0
;
i
<
10
;
i
++
)
{
i_v
[
i
]
=
p_v
[
i
*
i_stride
+
x
];
}
i_QP
=
p_QP_store
[(
y
>>
i_QP_scale
)
*
i_QP_stride
+
(
x
>>
i_QP_scale
)];
/* XXX QP is for v5 */
if
(
pp_deblock_isDC_mode
(
i_v
)
)
{
if
(
pp_deblock_isMinMaxOk
(
i_v
,
i_QP
)
)
{
pp_deblock_DCMode
(
i_v
,
i_QP
);
}
}
else
{
pp_deblock_DefaultMode
(
i_v
,
i_stride
,
i_QP
);
}
/* Copy back, XXX only 1-8 were modified */
for
(
i
=
1
;
i
<
9
;
i
++
)
{
p_v
[
i
*
i_stride
+
x
]
=
i_v
[
i
];
}
}
}
return
;
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* --------- filter Horizontal lines so follow vertical edges -------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_deblock_H
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
,
int
b_chroma
)
{
int
x
,
y
;
uint8_t
*
p_v
;
int
i_QP_scale
;
int
i_QP
;
i_QP_scale
=
b_chroma
?
5
:
4
;
for
(
y
=
0
;
y
<
i_height
;
y
++
)
{
p_v
=
p_plane
+
y
*
i_stride
-
5
;
for
(
x
=
8
;
x
<
i_width
-
4
;
x
+=
8
)
{
/* p_v point 5 pix before a block boundary */
/* XXX QP is for v5 */
i_QP
=
p_QP_store
[(
y
>>
i_QP_scale
)
*
i_QP_stride
+
(
x
>>
i_QP_scale
)];
if
(
pp_deblock_isDC_mode
(
p_v
+
x
)
)
{
if
(
pp_deblock_isMinMaxOk
(
p_v
+
x
,
i_QP
)
)
{
pp_deblock_DCMode
(
p_v
+
x
,
i_QP
);
}
}
else
{
pp_deblock_DefaultMode
(
p_v
+
x
,
i_stride
,
i_QP
);
}
}
}
return
;
}
/*****************************************************************************
*
* Internals functions common to pp_Dering_Y pp_Dering_C
*
*****************************************************************************/
static
inline
void
pp_dering_MinMax
(
uint8_t
*
p_block
,
int
i_stride
,
int
*
pi_min
,
int
*
pi_max
)
{
int
y
;
int
i_min
,
i_max
;
i_min
=
255
;
i_max
=
0
;
for
(
y
=
0
;
y
<
8
;
y
++
)
{
if
(
i_min
>
p_block
[
0
]
)
i_min
=
p_block
[
0
];
if
(
i_max
<
p_block
[
0
]
)
i_max
=
p_block
[
0
];
if
(
i_min
>
p_block
[
1
]
)
i_min
=
p_block
[
1
];
if
(
i_max
<
p_block
[
1
]
)
i_max
=
p_block
[
1
];
if
(
i_min
>
p_block
[
2
]
)
i_min
=
p_block
[
2
];
if
(
i_max
<
p_block
[
2
]
)
i_max
=
p_block
[
2
];
if
(
i_min
>
p_block
[
3
]
)
i_min
=
p_block
[
3
];
if
(
i_max
<
p_block
[
3
]
)
i_max
=
p_block
[
3
];
if
(
i_min
>
p_block
[
4
]
)
i_min
=
p_block
[
4
];
if
(
i_max
<
p_block
[
4
]
)
i_max
=
p_block
[
4
];
if
(
i_min
>
p_block
[
5
]
)
i_min
=
p_block
[
5
];
if
(
i_max
<
p_block
[
5
]
)
i_max
=
p_block
[
5
];
if
(
i_min
>
p_block
[
6
]
)
i_min
=
p_block
[
6
];
if
(
i_max
<
p_block
[
6
]
)
i_max
=
p_block
[
6
];
if
(
i_min
>
p_block
[
7
]
)
i_min
=
p_block
[
7
];
if
(
i_max
<
p_block
[
7
]
)
i_max
=
p_block
[
7
];
#if 0
int x;
for( x = 0; x < 8; x++ )
{
if( i_min > p_block[x] ) i_min = p_block[x];
if( i_max < p_block[x] ) i_max = p_block[x];
}
#endif
p_block
+=
i_stride
;
}
*
pi_min
=
i_min
;
*
pi_max
=
i_max
;
}
static
inline
void
pp_dering_BinIndex
(
uint8_t
*
p_block
,
int
i_stride
,
int
i_thr
,
uint32_t
*
p_bin
)
{
int
x
,
y
;
uint32_t
i_bin
;
for
(
y
=
0
;
y
<
10
;
y
++
)
{
i_bin
=
0
;
for
(
x
=
0
;
x
<
10
;
x
++
)
{
if
(
p_block
[
x
]
>
i_thr
)
{
i_bin
|=
1
<<
x
;
}
}
i_bin
|=
(
~
i_bin
)
<<
16
;
/* for detect also three 0 */
*
p_bin
=
i_bin
&
(
i_bin
>>
1
)
&
(
i_bin
<<
1
);
p_block
+=
i_stride
;
p_bin
++
;
}
}
static
inline
void
pp_dering_Filter
(
uint8_t
*
p_block
,
int
i_stride
,
uint32_t
*
p_bin
,
int
i_QP
)
{
int
x
,
y
;
uint32_t
i_bin
;
int
i_flt
[
8
][
8
];
int
i_f
;
uint8_t
*
p_sav
;
int
i_QP_2
;
p_sav
=
p_block
;
i_QP_2
=
i_QP
>>
1
;
for
(
y
=
0
;
y
<
8
;
y
++
)
{
i_bin
=
p_bin
[
y
]
&
p_bin
[
y
+
1
]
&
p_bin
[
y
+
2
];
/* To be optimised */
i_bin
|=
i_bin
>>
16
;
/* detect 0 or 1 */
for
(
x
=
0
;
x
<
8
;
x
++
)
{
if
(
i_bin
&
0x02
)
/* 0x02 since 10 index but want 1-9 */
{
/* apply dering */
/* 1 2 1
2 4 2 + (8)
1 2 1 */
i_f
=
p_block
[
x
-
i_stride
-
1
]
+
(
p_block
[
x
-
i_stride
]
<<
1
)
+
p_block
[
x
-
i_stride
+
1
]
+
(
p_block
[
x
-
1
]
<<
1
)
+
(
p_block
[
x
]
<<
2
)
+
(
p_block
[
x
+
1
]
<<
1
)
+
p_block
[
x
+
i_stride
-
1
]
+
(
p_block
[
x
+
i_stride
]
<<
1
)
+
p_block
[
x
+
i_stride
+
1
];
i_f
=
(
8
+
i_f
)
>>
4
;
/* Clamp this value */
if
(
i_f
-
p_block
[
x
]
>
(
i_QP_2
)
)
{
i_flt
[
y
][
x
]
=
p_block
[
x
]
+
i_QP_2
;
}
else
if
(
i_f
-
p_block
[
x
]
<
-
i_QP_2
)
{
i_flt
[
y
][
x
]
=
p_block
[
x
]
-
i_QP_2
;
}
else
{
i_flt
[
y
][
x
]
=
i_f
;
}
}
else
{
i_flt
[
y
][
x
]
=
p_block
[
x
];
}
i_bin
>>=
1
;
}
p_block
+=
i_stride
;
}
for
(
y
=
0
;
y
<
8
;
y
++
)
{
for
(
x
=
0
;
x
<
8
;
x
++
)
{
p_sav
[
x
]
=
i_flt
[
y
][
x
];
}
p_sav
+=
i_stride
;
}
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* ----------------- Dering filter on Y and C blocks ----------------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_dering_Y
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
)
{
int
x
,
y
,
k
;
int
i_max
[
4
],
i_min
[
4
],
i_range
[
4
];
int
i_thr
[
4
];
int
i_max_range
,
i_kmax
;
uint32_t
i_bin
[
4
][
10
];
uint8_t
*
p_block
[
4
];
QT_STORE_T
*
p_QP
;
/* We process 4 blocks/loop*/
for
(
y
=
8
;
y
<
i_height
-
8
;
y
+=
16
)
{
/* +---+
|0|1|
+-+-+ :))
|2|3|
+-+-+ */
p_block
[
0
]
=
p_plane
+
y
*
i_stride
+
8
;
p_block
[
1
]
=
p_block
[
0
]
+
8
;
p_block
[
2
]
=
p_block
[
0
]
+
(
i_stride
<<
3
);
p_block
[
3
]
=
p_block
[
2
]
+
8
;
for
(
x
=
8
;
x
<
i_width
-
8
;
x
+=
16
)
{
/* 1: Calculate threshold */
/* Calculate max/min for each block */
pp_dering_MinMax
(
p_block
[
0
],
i_stride
,
&
i_min
[
0
],
&
i_max
[
0
]
);
pp_dering_MinMax
(
p_block
[
1
],
i_stride
,
&
i_min
[
1
],
&
i_max
[
1
]
);
pp_dering_MinMax
(
p_block
[
2
],
i_stride
,
&
i_min
[
2
],
&
i_max
[
2
]
);
pp_dering_MinMax
(
p_block
[
3
],
i_stride
,
&
i_min
[
3
],
&
i_max
[
3
]
);
/* Calculate range, max_range and thr */
i_max_range
=
0
;
i_kmax
=
0
;
for
(
k
=
0
;
k
<=
4
;
k
++
)
{
i_range
[
k
]
=
i_max
[
k
]
-
i_min
[
k
];
i_thr
[
k
]
=
(
i_max
[
k
]
+
i_min
[
k
]
+
1
)
/
2
;
if
(
i_max_range
<
i_max
[
k
])
{
i_max_range
=
i_max
[
k
];
i_kmax
=
k
;
}
}
/* Now rearrange thr */
if
(
i_max_range
>
64
)
{
for
(
k
=
1
;
k
<
5
;
k
++
)
{
if
(
i_range
[
k
]
<
16
)
{
i_thr
[
k
]
=
0
;
}
else
if
(
i_range
[
k
]
<
32
)
{
i_thr
[
k
]
=
i_thr
[
i_kmax
];
}
}
}
else
{
for
(
k
=
1
;
k
<
5
;
k
++
)
{
if
(
i_range
[
k
]
<
16
)
{
i_thr
[
k
]
=
0
;
}
}
}
/* 2: Index acquisition 10x10 ! so " -i_stride - 1"*/
pp_dering_BinIndex
(
p_block
[
0
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
0
],
i_bin
[
0
]
);
pp_dering_BinIndex
(
p_block
[
1
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
1
],
i_bin
[
1
]
);
pp_dering_BinIndex
(
p_block
[
2
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
2
],
i_bin
[
2
]
);
pp_dering_BinIndex
(
p_block
[
3
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
3
],
i_bin
[
3
]
);
/* 3: adaptive smoothing */
/* since we begin at (8,8) QP can be different for each block */
p_QP
=
&
(
p_QP_store
[(
y
>>
4
)
*
i_QP_stride
+
(
x
>>
4
)]
);
pp_dering_Filter
(
p_block
[
0
],
i_stride
,
i_bin
[
0
],
p_QP
[
0
]
);
pp_dering_Filter
(
p_block
[
1
],
i_stride
,
i_bin
[
1
],
p_QP
[
1
]
);
pp_dering_Filter
(
p_block
[
2
],
i_stride
,
i_bin
[
2
],
p_QP
[
i_QP_stride
]
);
pp_dering_Filter
(
p_block
[
3
],
i_stride
,
i_bin
[
3
],
p_QP
[
i_QP_stride
+
1
]
);
p_block
[
0
]
+=
8
;
p_block
[
1
]
+=
8
;
p_block
[
2
]
+=
8
;
p_block
[
3
]
+=
8
;
}
}
}
void
E_
(
pp_dering_C
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
)
{
int
x
,
y
;
int
i_max
,
i_min
;
int
i_thr
;
uint32_t
i_bin
[
10
];
uint8_t
*
p_block
;
for
(
y
=
8
;
y
<
i_height
-
8
;
y
+=
8
)
{
p_block
=
p_plane
+
y
*
i_stride
+
8
;
for
(
x
=
8
;
x
<
i_width
-
8
;
x
+=
8
)
{
/* 1: Calculate threshold */
/* Calculate max/min for each block */
pp_dering_MinMax
(
p_block
,
i_stride
,
&
i_min
,
&
i_max
);
/* Calculate thr*/
i_thr
=
(
i_max
+
i_min
+
1
)
/
2
;
/* 2: Index acquisition 10x10 */
/* point on 10x10 in wich we have our 8x8 block */
pp_dering_BinIndex
(
p_block
-
i_stride
-
1
,
i_stride
,
i_thr
,
i_bin
);
/* 3: adaptive smoothing */
pp_dering_Filter
(
p_block
,
i_stride
,
i_bin
,
p_QP_store
[(
y
>>
5
)
*
i_QP_stride
+
(
x
>>
5
)]);
p_block
+=
8
;
}
}
}
modules/codec/ffmpeg/postprocessing/postprocessing_common.h
deleted
100644 → 0
View file @
b15eda88
/*****************************************************************************
* postprocessing_common.h
*****************************************************************************
* Copyright (C) 2001 VideoLAN
* $Id: postprocessing_common.h,v 1.2 2002/08/08 22:28:22 sam Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#if 0
#define PP_USE_3DNOW /* Nothing done yet */
#define PP_USE_MMX /* when only MMX is supported */
#define PP_USE_MMXEXT /* when MMXEXT is also supported, imply MMX */
#endif
/* thresholds for deblocking, I've taken value given by ISO */
#define PP_THR1 2ULL
/* threshold for deblocking */
#define PP_2xTHR1 ( 2 * PP_THR1 )
/* internal usage */
#define PP_THR2 6ULL
/* Some usefull macros */
#define PP_MAX( a, b ) ( a > b ? (a) : (b) )
#define PP_MIN( a, b ) ( a < b ? (a) : (b) )
#define PP_ABS( x ) ( ( x < 0 ) ? (-(x)) : (x) )
#define PP_SGN( x ) ( ( x < 0 ) ? -1 : 1 )
#define PP_MIN3( a, b, c ) ( PP_MIN( (a), PP_MIN( (b), (c) ) ) )
#define PP_CLIP( x, a, b ) ( PP_MAX( (a), PP_MIN( (x), (b) ) ) )
void
E_
(
pp_deblock_V
)();
void
E_
(
pp_deblock_H
)();
void
E_
(
pp_dering_Y
)();
void
E_
(
pp_dering_C
)();
modules/codec/ffmpeg/postprocessing/postprocessing_mmx.c
deleted
100644 → 0
View file @
b15eda88
/*****************************************************************************
* postprocessing_mmx.c: Post Processing library in MMX
*****************************************************************************
* Copyright (C) 2001 VideoLAN
* $Id: postprocessing_mmx.c,v 1.3 2002/12/18 14:17:10 sam Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include <vlc/vlc.h>
/* only use uint8_t, uint32_t .... */
#include "postprocessing.h"
#include "postprocessing_common.h"
/*****************************************************************************
*
* Internals functions common to pp_Deblock_V and pp_Deblock_H
*
*****************************************************************************/
/*****************************************************************************
* MMX stuff
*****************************************************************************/
/* XXX PP_THR1 need to be defined as ULL */
/* Use same things as in idct but how it work ? */
#define UNUSED_LONGLONG( foo ) \
static const unsigned long long foo __asm__ (#foo) __attribute__((unused))
/* to calculate isDC_mode for mmx */
UNUSED_LONGLONG
(
mmx_127_thr1
)
=
(
(
127ULL
-
PP_THR1
)
<<
56
)
|
(
(
127ULL
-
PP_THR1
)
<<
48
)
|
(
(
127ULL
-
PP_THR1
)
<<
40
)
|
(
(
127ULL
-
PP_THR1
)
<<
32
)
|
(
(
127ULL
-
PP_THR1
)
<<
24
)
|
(
(
127ULL
-
PP_THR1
)
<<
16
)
|
(
(
127ULL
-
PP_THR1
)
<<
8
)
|
(
(
127ULL
-
PP_THR1
)
);
UNUSED_LONGLONG
(
mmx_127_2xthr1_1
)
=
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
56
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
48
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
40
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
32
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
24
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
16
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
8
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
);
UNUSED_LONGLONG
(
mmx_m2_5_m5_2
)
=
0xfffe0005fffb0002ULL
;
#if 0
/* find min bytes from r ans set it in r, t is destroyed */
#define MMXEXT_GET_PMIN( r, t ) \
"movq " #r ", " #t " \n" \
"psrlq $8, " #t " \n" \
"pminub " #t ", " #r " \n" \
"pshufw $0xf5, " #r ", " #t " #instead of shift with tmp reg \n" \
"pminub " #t ", " #r " \n" \
"pshufw $0xfe, " #r ", " #t " \n" \
"pminub " #t ", " #r " \n"
/* find mzx bytes from r ans set it in r, t is destroyed */
#define MMXEXT_GET_PMAX( r, t ) \
"movq " #r ", " #t " \n" \
"psrlq $8, " #t " \n" \
"pmaxub " #t ", " #r " \n" \
"pshufw $0xf5, " #r ", " #t " \n" \
"pmaxub " #t ", " #r " \n" \
"pshufw $0xfe, " #r ", " #t " \n" \
"pmaxub " #t ", " #r " \n"
#define MMXEXT_GET_LMINMAX( s, m, M, t ) \
"movq " #s ", " #t " \n" \
"pminub " #t ", " #m " \n" \
"pmaxub " #t ", " #M " \n"
/* Some tips for MMX
* |a-b| :
d1 = a - b with unsigned saturate
d2 = b - a with ...
|a-b| = d1 | d2
*/
#endif
/****************************************************************************
* pp_deblock_isDC_mode : Check if we will use DC mode or Default mode
****************************************************************************
* Use constant PP_THR1 and PP_THR2 ( PP_2xTHR1 )
*
* Called for for each pixel on a boundary block when doing deblocking
* so need to be fast ...
*
****************************************************************************/
static
inline
int
pp_deblock_isDC_mode
(
uint8_t
*
p_v
)
{
unsigned
int
i_eq_cnt
;
/* algo :
x = v[i] - v[i+1] without signed saturation
( XXX see if there is'nt problem, but can't be with signed
sat because pixel will be saturate :(
so x within [-128, 127] and we have to test if it fit in [-M, M]
we add 127-M with wrap around -> good value fit in [ 127-2*M, 127]
and if x >= 127 - 2 * M ie x > 127 -2*M - 1 value is good
*/
__asm__
__volatile__
(
" #* Do (v0-v1) to (v7-v8)
\n
"
"movq (%1), %%mm1 # load v0->v7
\n
"
"movq 1(%1), %%mm2 # load v1->v8
\n
"
"psubb %%mm2, %%mm1 # v[i]-v[i+1]
\n
"
"paddb mmx_127_thr1, %%mm1 # + 127-THR1 with wrap
\n
"
"pcmpgtb mmx_127_2xthr1_1, %%mm1 # > 127 -2*thr1 - 1
\n
"
"movq %%mm1, %%mm0 #
\n
"
"psrlw $8, %%mm1 #
\n
"
"paddb %%mm1, %%mm0 #
\n
"
" #
\n
"
"movq %%mm0, %%mm1 # Now sum to create eq_cnt
\n
"
"psrld $16, %%mm0 #
\n
"
"paddb %%mm0, %%mm1 #
\n
"
" #
\n
"
"movq %%mm1, %%mm0 #
\n
"
"psrlq $32, %%mm1 #
\n
"
"paddb %%mm1, %%mm0
\n
"
"movd %%mm0, %0 #
\n
"
"negl %0
\n
"
"andl $255, %0"
:
"=r"
(
i_eq_cnt
)
:
"r"
(
p_v
)
);
/* last test, hey, 9 don't fit in MMX */
if
((
(
p_v
[
8
]
-
p_v
[
9
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
{
i_eq_cnt
++
;
}
#if 0
/* algo : if ( | v[i] -v[i+1] | <= PP_THR1 ) { i_eq_cnt++; } */
i_eq_cnt = 0;
for( i =0; i < 9; i++ )
{
if(( ( p_v[i] - p_v[i+1] + PP_THR1 )&0xffff )<= PP_2xTHR1 )
{
i_eq_cnt++;
}
}
#endif
return
(
(
i_eq_cnt
>=
PP_THR2
)
?
1
:
0
);
}
static
inline
int
pp_deblock_isMinMaxOk
(
uint8_t
*
p_v
,
int
i_QP
)
{
int
i_range
;
#if 0
__asm__ __volatile__ (
"movq 1(%1), %%mm0 # 8 bytes \n"
"movq %%mm0, %%mm1 \n"
MMXEXT_GET_PMIN( %%mm0, %%mm7 )
MMXEXT_GET_PMAX( %%mm1, %%mm7 )
"psubd %%mm0, %%mm1 # max - min \n"
"movd %%mm1, %0 \n"
"andl $255, %0" : "=r"(i_range) : "r"(p_v) );
#endif
int
i_max
,
i_min
;
int
i
;
i_min
=
i_max
=
p_v
[
1
];
for
(
i
=
2
;
i
<
9
;
i
++
)
{
if
(
i_max
<
p_v
[
i
]
)
i_max
=
p_v
[
i
];
if
(
i_min
>
p_v
[
i
]
)
i_min
=
p_v
[
i
];
}
i_range
=
i_max
-
i_min
;
return
(
i_range
<
2
*
i_QP
?
1
:
0
);
}
static
inline
void
pp_deblock_DefaultMode
(
uint8_t
i_v
[
10
],
int
i_stride
,
int
i_QP
)
{
int
d
,
i_delta
;
int
a3x0
,
a3x0_
,
a3x1
,
a3x2
;
int
b_neg
;
/* d = CLIP( 5(a3x0' - a3x0)//8, 0, (v4-v5)/2 ).d( abs(a3x0) < QP ) */
/* First calculate a3x0 */
__asm__
__volatile__
(
"pxor %%mm7, %%mm7 # mm7 = 0
\n
"
"movq mmx_m2_5_m5_2, %%mm6 # mm6 =(2,-5,5,-2)
\n
"
"movd 3(%1), %%mm0
\n
"
"punpcklbw %%mm7,%%mm0
\n
"
"pmaddwd %%mm6, %%mm0
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $32, %%mm1
\n
"
"paddd %%mm1, %%mm0
\n
"
"movd %%mm0, %0"
:
"=r"
(
a3x0
)
:
"r"
(
i_v
)
);
#if 0
a3x0 = 2 * ( i_v[3] - i_v[6] ) + 5 *( i_v[5] - i_v[4] );
#endif
if
(
a3x0
<
0
)
{
b_neg
=
1
;
a3x0
=
-
a3x0
;
}
else
{
b_neg
=
0
;
}
/* XXX Now a3x0 is abs( a3x0 ) */
if
(
(
a3x0
<
8
*
i_QP
)
&&
(
a3x0
!=
0
)
)
/* |a3x0| < 8*i_QP */
{
/* calculate a3x1 et a3x2 */
__asm__
__volatile__
(
" # mm7 = 0
\n
"
" # mm6 = ( 2, -5, 5, -2 )
\n
"
"movd 1(%2), %%mm0
\n
"
"movd 5(%2), %%mm2
\n
"
"punpcklbw %%mm7,%%mm0
\n
"
"punpcklbw %%mm7,%%mm2
\n
"
"pmaddwd %%mm6, %%mm0
\n
"
"pmaddwd %%mm6, %%mm2
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $32, %%mm1
\n
"
"paddd %%mm1, %%mm0 # mm0 = a3x1
\n
"
"movd %%mm0, %0
\n
"
"movq %%mm2, %%mm1
\n
"
"psrlq $32, %%mm1
\n
"
"paddd %%mm1, %%mm2 # mm2 = a3x2
\n
"
"movd %%mm2, %1
\n
"
:
"=r"
(
a3x1
),
"=r"
(
a3x2
)
:
"r"
(
i_v
)
);
#if 0
a3x1 = 2 * ( i_v[1] - i_v[4] ) + 5 * ( i_v[3] - i_v[2] );
a3x2 = 2 * ( i_v[5] - i_v[8] ) + 5 * ( i_v[7] - i_v[6] );
#endif
if
(
a3x1
<
0
)
a3x1
=
-
a3x1
;
/* abs( a3x1 ) */
if
(
a3x2
<
0
)
a3x2
=
-
a3x2
;
/* abs( a3x2 ) */
a3x0_
=
PP_MIN3
(
a3x0
,
a3x1
,
a3x2
);
d
=
5
*
(
a3x0
-
a3x0_
)
/
8
;
/* always > 0 */
i_delta
=
(
i_v
[
4
]
-
i_v
[
5
]
)
/
2
;
/* clip into [0, i_delta] or [i_delta, 0] */
if
(
i_delta
<
0
)
{
if
(
!
b_neg
)
/* since true d has sgn(d) = - sgn( a3x0 ) */
{
d
=
-
d
;
if
(
d
<
i_delta
)
d
=
i_delta
;
i_v
[
4
]
-=
d
;
i_v
[
5
]
+=
d
;
}
}
else
{
if
(
b_neg
)
{
if
(
d
>
i_delta
)
d
=
i_delta
;
i_v
[
4
]
-=
d
;
i_v
[
5
]
+=
d
;
}
}
}
}
static
inline
void
pp_deblock_DCMode
(
uint8_t
*
p_v
,
/* = int i_v[10] */
int
i_QP
)
{
int
i_p0
,
i_p9
;
i_p0
=
PP_ABS
(
p_v
[
1
]
-
p_v
[
0
]
)
<
i_QP
?
p_v
[
0
]
:
p_v
[
1
];
i_p9
=
PP_ABS
(
p_v
[
8
]
-
p_v
[
9
]
)
<
i_QP
?
p_v
[
9
]
:
p_v
[
8
];
/* mm0 = 8 pix unmodified
-We will process first 4 pixel
mm0 = 8 pix unmodified
mm1 = for the first part of the 4 first pix
(v1) -> (p0) -> ... ( word )
(v2) (v1)
(v3) (v2)
(v4) (v3)
= for the commoin part between first and last pix
(v2) -> (v3) -> ... ( word )
(v3) (v4)
(v4) (v5)
(v5) (v6)
= for the last part of the 4 last pix
(v5) -> (v6) -> ... ( word )
(v6) (v7)
(v7) (v8)
(v8) (p9)
mm2 = acu for first new pix
mm3 = acu for last pix
mm4 = unused
mm5 = p0
mm6 = p9 << 48
mm7 = 0 */
__asm__
__volatile__
(
"pxor %%mm7, %%mm7
\n
"
"movq 1(%0), %%mm0 # get 8 pix
\n
"
" # unpack into mm1
\n
"
"movq %%mm0, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1
\n
"
" # get p_0 and i_p9
\n
"
"movd %1, %%mm5
\n
"
"movd %2, %%mm6
\n
"
"psllq $48, %%mm6
\n
"
"
\n
"
"movq %%mm1, %%mm3 # p_v[5-8] = v[1-4] !!
\n
"
"movq %%mm1, %%mm2
\n
"
"psllw $2, %%mm2 # p_v[1-4] = 4*v[1-4]
\n
"
"
\n
"
"psllq $16, %%mm1
\n
"
"por %%mm5, %%mm1 # mm1 =( p0, v1, v2 ,v3)
\n
"
"
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm2
\n
"
"
\n
"
"psllq $16, %%mm1
\n
"
"por %%mm5, %%mm1 # mm1 =( p0, p0, v1, v2)
\n
"
"
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm2
\n
"
"
\n
"
"psllq $16, %%mm1
\n
"
"por %%mm5, %%mm1 # mm1 =( p0, p0, p0, v1)
\n
"
"
\n
"
"paddw %%mm1, %%mm2
\n
"
"
\n
"
"psllq $16, %%mm1
\n
"
"por %%mm5, %%mm1 # mm1 =( p0, p0, p0, p0)
\n
"
"
\n
"
"paddw %%mm1, %%mm2
\n
"
" # Now last part a little borring
\n
"
" # last part for mm2, beginig for mm3
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $8, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v2, v3, v4, v5 )
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $16, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v3, v4, v5, v6 )
\n
"
"psllw $1, %%mm1
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $24, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v4, v5, v6, v7)
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm3
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $32, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v5, v6, v7, v8)
\n
"
"paddw %%mm1, %%mm2
\n
"
"psllw $2, %%mm1
\n
"
"paddw %%mm1, %%mm3
\n
"
" # Now last part for last 4 pix
\n
"
" #
\n
"
"movq %%mm0, %%mm1
\n
"
"punpckhbw %%mm7, %%mm1 # mm1 = ( v5, v6, v7, v8)
\n
"
"
\n
"
"psrlq $16, %%mm1
\n
"
"por %%mm6, %%mm1 # mm1 =( v6, v7, v8, p9 )
\n
"
"
\n
"
"paddw %%mm1, %%mm3
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"psrlq $16, %%mm1
\n
"
"por %%mm6, %%mm1 # mm1 =( v7, v8, p9, p9)
\n
"
"
\n
"
"paddw %%mm1, %%mm3
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"psrlq $16, %%mm1
\n
"
"por %%mm6, %%mm1 # mm1 =( v8, p9, p9, p9 )
\n
"
"
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"psrlq $16, %%mm1
\n
"
"por %%mm6, %%mm1 # mm1 =( p9, p9, p9, p9 )
\n
"
"
\n
"
"paddw %%mm1, %%mm3
\n
"
"psrlw $4, %%mm2
\n
"
"psrlw $4, %%mm3
\n
"
"packuswb %%mm3, %%mm2
\n
"
"movq %%mm2, 1(%0)
\n
"
:
:
"r"
(
p_v
),
"r"
(
i_p0
),
"r"
(
i_p9
)
:
"memory"
);
#if 0
for( i = 1; i < 9; i++ )
{
v[i] = p_v[i]; /* save 8 pix that will be modified */
}
p_v[1] = ( 6 * i_p0 + 4 * v[1]
+ 2 *( v[2] + v[3]) + v[4] + v[5]) >> 4;
p_v[2] = ( 4 * i_p0 + 2 * v[1] + 4 * v[2]
+ 2 *( v[3] + v[4]) + v[5] + v[6]) >> 4;
p_v[3] = ( 2 * i_p0 + 2 * (v[1] + v[2]) + 4 * v[3]
+ 2 *( v[4] + v[5]) + v[6] + v[7]) >> 4;
p_v[4] = ( i_p0 + v[1] + 2 * (v[2] + v[3]) + 4 * v[4]
+ 2 *( v[5] + v[6]) + v[7] + v[8]) >> 4;
p_v[5] = ( v[1] + v[2] + 2 * (v[3] + v[4]) + 4 * v[5]
+ 2 *( v[6] + v[7]) + v[8] + i_p9) >> 4;
p_v[6] = ( v[2] + v[3] + 2 * (v[4] + v[5]) + 4 * v[6]
+ 2 *( v[7] + v[8]) + 2 * i_p9) >> 4;
p_v[7] = ( v[3] + v[4] + 2 * (v[5] + v[6]) + 4 * v[7]
+ 2 * v[8] + 4 * i_p9) >> 4;
p_v[8] = ( v[4] + v[5] + 2 * (v[6] + v[7]) + 4 * v[8]
+ 6 * i_p9) >> 4;
#endif
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* ---------- filter Vertical lines so follow horizontal edges -------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_deblock_V
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
,
int
b_chroma
)
{
int
x
,
y
,
i
;
uint8_t
*
p_v
;
int
i_QP_scale
;
/* use to do ( ? >> i_QP_scale ) */
int
i_QP
;
uint8_t
i_v
[
10
];
i_QP_scale
=
b_chroma
?
5
:
4
;
for
(
y
=
8
;
y
<
i_height
-
4
;
y
+=
8
)
{
p_v
=
p_plane
+
(
y
-
5
)
*
i_stride
;
for
(
x
=
0
;
x
<
i_width
;
x
++
)
{
/* First get 10 vert pix to use them without i_stride */
for
(
i
=
0
;
i
<
10
;
i
++
)
{
i_v
[
i
]
=
p_v
[
i
*
i_stride
+
x
];
}
i_QP
=
p_QP_store
[(
y
>>
i_QP_scale
)
*
i_QP_stride
+
(
x
>>
i_QP_scale
)];
/* XXX QP is for v5 */
if
(
pp_deblock_isDC_mode
(
i_v
)
)
{
if
(
pp_deblock_isMinMaxOk
(
i_v
,
i_QP
)
)
{
pp_deblock_DCMode
(
i_v
,
i_QP
);
}
}
else
{
pp_deblock_DefaultMode
(
i_v
,
i_stride
,
i_QP
);
}
/* Copy back, XXX only 1-8 were modified */
for
(
i
=
1
;
i
<
9
;
i
++
)
{
p_v
[
i
*
i_stride
+
x
]
=
i_v
[
i
];
}
}
}
return
;
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* --------- filter Horizontal lines so follow vertical edges -------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_deblock_H
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
,
int
b_chroma
)
{
int
x
,
y
;
uint8_t
*
p_v
;
int
i_QP_scale
;
int
i_QP
;
i_QP_scale
=
b_chroma
?
5
:
4
;
for
(
y
=
0
;
y
<
i_height
;
y
++
)
{
p_v
=
p_plane
+
y
*
i_stride
-
5
;
for
(
x
=
8
;
x
<
i_width
-
4
;
x
+=
8
)
{
/* p_v point 5 pix before a block boundary */
/* XXX QP is for v5 */
i_QP
=
p_QP_store
[(
y
>>
i_QP_scale
)
*
i_QP_stride
+
(
x
>>
i_QP_scale
)];
if
(
pp_deblock_isDC_mode
(
p_v
+
x
)
)
{
if
(
pp_deblock_isMinMaxOk
(
p_v
+
x
,
i_QP
)
)
{
pp_deblock_DCMode
(
p_v
+
x
,
i_QP
);
}
}
else
{
pp_deblock_DefaultMode
(
p_v
+
x
,
i_stride
,
i_QP
);
}
}
}
return
;
}
/*****************************************************************************
*
* Internals functions common to pp_Dering_Y pp_Dering_C
*
*****************************************************************************/
static
inline
void
pp_dering_MinMax
(
uint8_t
*
p_block
,
int
i_stride
,
int
*
pi_min
,
int
*
pi_max
)
{
int
x
,
y
;
int
i_min
,
i_max
;
#if 0
/* First we will extract min/max for each pix on vertical line
and next extract global min/max */
__asm__ __volatile__(
"leal (%2,%3), %%eax \n"
"movq (%2), %%mm0 #load line \n"
"movq %%mm0, %%mm1 \n"
MMXEXT_GET_LMINMAX( (%%eax), %%mm0, %%mm1, %%mm7 )
MMXEXT_GET_LMINMAX( (%%eax, %3), %%mm0, %%mm1, %%mm7 )
MMXEXT_GET_LMINMAX( (%%eax, %3,2), %%mm0, %%mm1, %%mm7 )
MMXEXT_GET_LMINMAX( (%2, %3, 4), %%mm0, %%mm1, %%mm7 )
"leal (%%eax,%3,4), %%eax \n"
MMXEXT_GET_LMINMAX( (%%eax), %%mm0, %%mm1, %%mm7 )
MMXEXT_GET_LMINMAX( (%%eax, %3), %%mm0, %%mm1, %%mm7 )
MMXEXT_GET_LMINMAX( (%%eax, %3,2), %%mm0, %%mm1, %%mm7 )
MMXEXT_GET_PMIN( %%mm0, %%mm7 )
MMXEXT_GET_PMAX( %%mm1, %%mm7 )
"movd %%mm0, %%eax \n"
"andl $255, %%eax \n"
"movl %%eax, (%0) \n"
"movd %%mm1, %%eax \n"
"andl $255, %%eax \n"
"movl %%eax, (%1) \n"
: : "r"(pi_min), "r"(pi_max), "r"(p_block), "r"(i_stride) : "%eax", "memory" );
#endif
i_min
=
255
;
i_max
=
0
;
for
(
y
=
0
;
y
<
8
;
y
++
)
{
for
(
x
=
0
;
x
<
8
;
x
++
)
{
if
(
i_min
>
p_block
[
x
]
)
i_min
=
p_block
[
x
];
if
(
i_max
<
p_block
[
x
]
)
i_max
=
p_block
[
x
];
}
p_block
+=
i_stride
;
}
*
pi_min
=
i_min
;
*
pi_max
=
i_max
;
}
static
inline
void
pp_dering_BinIndex
(
uint8_t
*
p_block
,
int
i_stride
,
int
i_thr
,
uint32_t
*
p_bin
)
{
int
x
,
y
;
uint32_t
i_bin
;
for
(
y
=
0
;
y
<
10
;
y
++
)
{
i_bin
=
0
;
for
(
x
=
0
;
x
<
10
;
x
++
)
{
if
(
p_block
[
x
]
>
i_thr
)
{
i_bin
|=
1
<<
x
;
}
}
i_bin
|=
(
~
i_bin
)
<<
16
;
/* for detect also three 0 */
*
p_bin
=
i_bin
&
(
i_bin
>>
1
)
&
(
i_bin
<<
1
);
p_block
+=
i_stride
;
p_bin
++
;
}
}
static
inline
void
pp_dering_Filter
(
uint8_t
*
p_block
,
int
i_stride
,
uint32_t
*
p_bin
,
int
i_QP
)
{
int
x
,
y
;
uint32_t
i_bin
;
int
i_flt
[
8
][
8
];
int
i_f
;
uint8_t
*
p_sav
;
int
i_QP_2
;
p_sav
=
p_block
;
i_QP_2
=
i_QP
>>
1
;
for
(
y
=
0
;
y
<
8
;
y
++
)
{
i_bin
=
p_bin
[
y
]
&
p_bin
[
y
+
1
]
&
p_bin
[
y
+
2
];
/* To be optimised */
i_bin
|=
i_bin
>>
16
;
/* detect 0 or 1 */
for
(
x
=
0
;
x
<
8
;
x
++
)
{
if
(
i_bin
&
0x02
)
/* 0x02 since 10 index but want 1-9 */
{
/* apply dering */
/* 1 2 1
2 4 2 + (8)
1 2 1 */
i_f
=
p_block
[
x
-
i_stride
-
1
]
+
(
p_block
[
x
-
i_stride
]
<<
1
)
+
p_block
[
x
-
i_stride
+
1
]
+
(
p_block
[
x
-
1
]
<<
1
)
+
(
p_block
[
x
]
<<
2
)
+
(
p_block
[
x
+
1
]
<<
1
)
+
p_block
[
x
+
i_stride
-
1
]
+
(
p_block
[
x
+
i_stride
]
<<
1
)
+
p_block
[
x
+
i_stride
+
1
];
i_f
=
(
8
+
i_f
)
>>
4
;
/* Clamp this value */
if
(
i_f
-
p_block
[
x
]
>
(
i_QP_2
)
)
{
i_flt
[
y
][
x
]
=
p_block
[
x
]
+
i_QP_2
;
}
else
if
(
i_f
-
p_block
[
x
]
<
-
i_QP_2
)
{
i_flt
[
y
][
x
]
=
p_block
[
x
]
-
i_QP_2
;
}
else
{
i_flt
[
y
][
x
]
=
i_f
;
}
}
else
{
i_flt
[
y
][
x
]
=
p_block
[
x
];
}
i_bin
>>=
1
;
}
p_block
+=
i_stride
;
}
for
(
y
=
0
;
y
<
8
;
y
++
)
{
for
(
x
=
0
;
x
<
8
;
x
++
)
{
p_sav
[
x
]
=
i_flt
[
y
][
x
];
}
p_sav
+=
i_stride
;
}
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* ----------------- Dering filter on Y and C blocks ----------------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_dering_Y
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
)
{
int
x
,
y
,
k
;
int
i_max
[
4
],
i_min
[
4
],
i_range
[
4
];
int
i_thr
[
4
];
int
i_max_range
,
i_kmax
;
uint32_t
i_bin
[
4
][
10
];
uint8_t
*
p_block
[
4
];
QT_STORE_T
*
p_QP
;
/* We process 4 blocks/loop*/
for
(
y
=
8
;
y
<
i_height
-
8
;
y
+=
16
)
{
/* +---+
|0|1|
+-+-+ :))
|2|3|
+-+-+ */
p_block
[
0
]
=
p_plane
+
y
*
i_stride
+
8
;
p_block
[
1
]
=
p_block
[
0
]
+
8
;
p_block
[
2
]
=
p_block
[
0
]
+
(
i_stride
<<
3
);
p_block
[
3
]
=
p_block
[
2
]
+
8
;
for
(
x
=
8
;
x
<
i_width
-
8
;
x
+=
16
)
{
/* 1: Calculate threshold */
/* Calculate max/min for each block */
pp_dering_MinMax
(
p_block
[
0
],
i_stride
,
&
i_min
[
0
],
&
i_max
[
0
]
);
pp_dering_MinMax
(
p_block
[
1
],
i_stride
,
&
i_min
[
1
],
&
i_max
[
1
]
);
pp_dering_MinMax
(
p_block
[
2
],
i_stride
,
&
i_min
[
2
],
&
i_max
[
2
]
);
pp_dering_MinMax
(
p_block
[
3
],
i_stride
,
&
i_min
[
3
],
&
i_max
[
3
]
);
/* Calculate range, max_range and thr */
i_max_range
=
0
;
i_kmax
=
0
;
for
(
k
=
0
;
k
<=
4
;
k
++
)
{
i_range
[
k
]
=
i_max
[
k
]
-
i_min
[
k
];
i_thr
[
k
]
=
(
i_max
[
k
]
+
i_min
[
k
]
+
1
)
/
2
;
if
(
i_max_range
<
i_max
[
k
])
{
i_max_range
=
i_max
[
k
];
i_kmax
=
k
;
}
}
/* Now rearrange thr */
if
(
i_max_range
>
64
)
{
for
(
k
=
1
;
k
<
5
;
k
++
)
{
if
(
i_range
[
k
]
<
16
)
{
i_thr
[
k
]
=
0
;
}
else
if
(
i_range
[
k
]
<
32
)
{
i_thr
[
k
]
=
i_thr
[
i_kmax
];
}
}
}
else
{
for
(
k
=
1
;
k
<
5
;
k
++
)
{
if
(
i_range
[
k
]
<
16
)
{
i_thr
[
k
]
=
0
;
}
}
}
/* 2: Index acquisition 10x10 ! so " -i_stride - 1"*/
pp_dering_BinIndex
(
p_block
[
0
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
0
],
i_bin
[
0
]
);
pp_dering_BinIndex
(
p_block
[
1
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
1
],
i_bin
[
1
]
);
pp_dering_BinIndex
(
p_block
[
2
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
2
],
i_bin
[
2
]
);
pp_dering_BinIndex
(
p_block
[
3
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
3
],
i_bin
[
3
]
);
/* 3: adaptive smoothing */
/* since we begin at (8,8) QP can be different for each block */
p_QP
=
&
(
p_QP_store
[(
y
>>
4
)
*
i_QP_stride
+
(
x
>>
4
)]
);
pp_dering_Filter
(
p_block
[
0
],
i_stride
,
i_bin
[
0
],
p_QP
[
0
]
);
pp_dering_Filter
(
p_block
[
1
],
i_stride
,
i_bin
[
1
],
p_QP
[
1
]
);
pp_dering_Filter
(
p_block
[
2
],
i_stride
,
i_bin
[
2
],
p_QP
[
i_QP_stride
]
);
pp_dering_Filter
(
p_block
[
3
],
i_stride
,
i_bin
[
3
],
p_QP
[
i_QP_stride
+
1
]
);
p_block
[
0
]
+=
8
;
p_block
[
1
]
+=
8
;
p_block
[
2
]
+=
8
;
p_block
[
3
]
+=
8
;
}
}
}
void
E_
(
pp_dering_C
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
)
{
int
x
,
y
;
int
i_max
,
i_min
;
int
i_thr
;
uint32_t
i_bin
[
10
];
uint8_t
*
p_block
;
for
(
y
=
8
;
y
<
i_height
-
8
;
y
+=
8
)
{
p_block
=
p_plane
+
y
*
i_stride
+
8
;
for
(
x
=
8
;
x
<
i_width
-
8
;
x
+=
8
)
{
/* 1: Calculate threshold */
/* Calculate max/min for each block */
pp_dering_MinMax
(
p_block
,
i_stride
,
&
i_min
,
&
i_max
);
/* Calculate thr*/
i_thr
=
(
i_max
+
i_min
+
1
)
/
2
;
/* 2: Index acquisition 10x10 */
/* point on 10x10 in wich we have our 8x8 block */
pp_dering_BinIndex
(
p_block
-
i_stride
-
1
,
i_stride
,
i_thr
,
i_bin
);
/* 3: adaptive smoothing */
pp_dering_Filter
(
p_block
,
i_stride
,
i_bin
,
p_QP_store
[(
y
>>
5
)
*
i_QP_stride
+
(
x
>>
5
)]);
p_block
+=
8
;
}
}
}
modules/codec/ffmpeg/postprocessing/postprocessing_mmxext.c
deleted
100644 → 0
View file @
b15eda88
/*****************************************************************************
* postprocessing_mmxext.c: Post Processing plugin MMXEXT
*****************************************************************************
* Copyright (C) 2001 VideoLAN
* $Id: postprocessing_mmxext.c,v 1.5 2002/12/18 14:17:10 sam Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include <vlc/vlc.h>
/* only use uint8_t, uint32_t .... */
#include "postprocessing.h"
#include "postprocessing_common.h"
/*****************************************************************************
*
* Internals functions common to pp_Deblock_V and pp_Deblock_H
*
*****************************************************************************/
/*****************************************************************************
* MMX stuff
*****************************************************************************/
/* XXX PP_THR1 need to be defined as ULL */
/* Use same things as in idct but how it work ? */
#define UNUSED_LONGLONG( foo ) \
static const unsigned long long foo __asm__ (#foo) __attribute__((unused))
/* to calculate isDC_mode for mmx */
UNUSED_LONGLONG
(
mmx_thr1
)
=
(
PP_THR1
<<
56
)
|
(
PP_THR1
<<
48
)
|
(
PP_THR1
<<
40
)
|
(
PP_THR1
<<
32
)
|
(
PP_THR1
<<
24
)
|
(
PP_THR1
<<
16
)
|
(
PP_THR1
<<
8
)
|
(
PP_THR1
);
UNUSED_LONGLONG
(
mmx_127_thr1
)
=
(
(
127ULL
-
PP_THR1
)
<<
56
)
|
(
(
127ULL
-
PP_THR1
)
<<
48
)
|
(
(
127ULL
-
PP_THR1
)
<<
40
)
|
(
(
127ULL
-
PP_THR1
)
<<
32
)
|
(
(
127ULL
-
PP_THR1
)
<<
24
)
|
(
(
127ULL
-
PP_THR1
)
<<
16
)
|
(
(
127ULL
-
PP_THR1
)
<<
8
)
|
(
(
127ULL
-
PP_THR1
)
);
UNUSED_LONGLONG
(
mmx_127_2xthr1_1
)
=
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
56
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
48
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
40
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
32
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
24
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
16
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
8
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
);
UNUSED_LONGLONG
(
mmx_m2_5_m5_2
)
=
0xfffe0005fffb0002ULL
;
/* find min bytes from r ans set it in r, t is destroyed */
#define MMXEXT_GET_PMIN( r, t ) \
"movq " #r ", " #t " \n" \
"psrlq $8, " #t " \n" \
"pminub " #t ", " #r " \n" \
"pshufw $0xf5, " #r ", " #t " #instead of shift with tmp reg \n" \
"pminub " #t ", " #r " \n" \
"pshufw $0xfe, " #r ", " #t " \n" \
"pminub " #t ", " #r " \n"
/* find mzx bytes from r ans set it in r, t is destroyed */
#define MMXEXT_GET_PMAX( r, t ) \
"movq " #r ", " #t " \n" \
"psrlq $8, " #t " \n" \
"pmaxub " #t ", " #r " \n" \
"pshufw $0xf5, " #r ", " #t " \n" \
"pmaxub " #t ", " #r " \n" \
"pshufw $0xfe, " #r ", " #t " \n" \
"pmaxub " #t ", " #r " \n"
#define MMXEXT_GET_LMINMAX( s, m, M, t ) \
"movq " #s ", " #t " \n" \
"pminub " #t ", " #m " \n" \
"pmaxub " #t ", " #M " \n"
/* Some tips for MMX
* |a-b| :
d1 = a - b with unsigned saturate
d2 = b - a with ...
|a-b| = d1 | d2
*/
/****************************************************************************
* pp_deblock_isDC_mode : Check if we will use DC mode or Default mode
****************************************************************************
* Use constant PP_THR1 and PP_THR2 ( PP_2xTHR1 )
*
* Called for for each pixel on a boundary block when doing deblocking
* so need to be fast ...
*
****************************************************************************/
static
inline
int
pp_deblock_isDC_mode
(
uint8_t
*
p_v
)
{
unsigned
int
i_eq_cnt
;
/* algo :
x = v[i] - v[i+1] without signed saturation
( XXX see if there is'nt problem, but can't be with signed
sat because pixel will be saturate :(
so x within [-128, 127] and we have to test if it fit in [-M, M]
we add 127-M with wrap around -> good value fit in [ 127-2*M, 127]
and if x >= 127 - 2 * M ie x > 127 -2*M - 1 value is good
*/
#if 0
__asm__ __volatile__ (
" #* Do (v0-v1) to (v7-v8) \n"
"movq (%1), %%mm1 # load v0->v7 \n"
"movq 1(%1), %%mm2 # load v1->v8 \n"
"psubb %%mm2, %%mm1 # v[i]-v[i+1] \n"
"paddb mmx_127_thr1, %%mm1 # + 127-THR1 with wrap \n"
"pcmpgtb mmx_127_2xthr1_1, %%mm1 # > 127 -2*thr1 - 1 \n"
"pxor %%mm0, %%mm0 # mm0 = 0 \n"
"psadbw %%mm1, %%mm0 \n"
"movd %%mm0, %0 # \n"
"negl %0 \n"
"andl $255, %0"
: "=r"(i_eq_cnt) : "r" (p_v) );
#endif
__asm__
__volatile__
(
" #* Do (v0-v1) to (v7-v8)
\n
"
"movq (%1), %%mm1 # load v0->v7
\n
"
"pxor %%mm0, %%mm0 # mm0 = 0
\n
"
"movq 1(%1), %%mm2 # load v1->v8
\n
"
"psubb %%mm2, %%mm1 # v[i]-v[i+1]
\n
"
"paddb mmx_127_thr1, %%mm1 # + 127-THR1 with wrap
\n
"
"pcmpgtb mmx_127_2xthr1_1, %%mm1 # > 127 -2*thr1 - 1
\n
"
"psadbw %%mm1, %%mm0
\n
"
"movd %%mm0, %0 #
\n
"
"negl %0"
:
"=r"
(
i_eq_cnt
)
:
"r"
(
p_v
)
);
/* last test, hey, 9 don't fit in MMX */
if
((
(
p_v
[
8
]
-
p_v
[
9
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
{
i_eq_cnt
++
;
}
#if 0
/* algo : if ( | v[i] -v[i+1] | <= PP_THR1 ) { i_eq_cnt++; } */
i_eq_cnt = 0;
for( i =0; i < 9; i++ )
{
if(( ( p_v[i] - p_v[i+1] + PP_THR1 )&0xffff )<= PP_2xTHR1 )
{
i_eq_cnt++;
}
}
#endif
return
(
(
i_eq_cnt
>=
PP_THR2
)
?
1
:
0
);
}
static
inline
int
pp_deblock_isMinMaxOk
(
uint8_t
*
p_v
,
int
i_QP
)
{
int
i_range
;
__asm__
__volatile__
(
"movq 1(%1), %%mm0 # 8 bytes
\n
"
"movq %%mm0, %%mm1
\n
"
MMXEXT_GET_PMIN
(
%%
mm0
,
%%
mm7
)
MMXEXT_GET_PMAX
(
%%
mm1
,
%%
mm7
)
"psubd %%mm0, %%mm1 # max - min
\n
"
"movd %%mm1, %0
\n
"
"andl $255, %0"
:
"=r"
(
i_range
)
:
"r"
(
p_v
)
);
#if 0
int i_max, i_min;
int i;
i_min = i_max = p_v[1];
for( i = 2; i < 9; i++ )
{
if( i_max < p_v[i] ) i_max = p_v[i];
if( i_min > p_v[i] ) i_min = p_v[i];
}
i_range = i_max - i_min;
#endif
return
(
i_range
<
2
*
i_QP
?
1
:
0
);
}
static
inline
void
pp_deblock_DefaultMode
(
uint8_t
i_v
[
10
],
int
i_stride
,
int
i_QP
)
{
int
d
,
i_delta
;
int
a3x0
,
a3x0_
,
a3x1
,
a3x2
;
int
b_neg
;
/* d = CLIP( 5(a3x0' - a3x0)//8, 0, (v4-v5)/2 ).d( abs(a3x0) < QP ) */
/* First calculate a3x0 */
__asm__
__volatile__
(
"pxor %%mm7, %%mm7 # mm7 = 0
\n
"
"movq mmx_m2_5_m5_2, %%mm6 # mm6 =(2,-5,5,-2)
\n
"
"movd 3(%1), %%mm0
\n
"
"punpcklbw %%mm7,%%mm0
\n
"
"pmaddwd %%mm6, %%mm0
\n
"
"pshufw $0xfe, %%mm0, %%mm1
\n
"
"paddd %%mm1, %%mm0
\n
"
"movd %%mm0, %0"
:
"=r"
(
a3x0
)
:
"r"
(
i_v
)
);
#if 0
a3x0 = 2 * ( i_v[3] - i_v[6] ) + 5 *( i_v[5] - i_v[4] );
#endif
if
(
a3x0
<
0
)
{
b_neg
=
1
;
a3x0
=
-
a3x0
;
}
else
{
b_neg
=
0
;
}
/* XXX Now a3x0 is abs( a3x0 ) */
if
(
(
a3x0
<
8
*
i_QP
)
&&
(
a3x0
!=
0
)
)
/* |a3x0| < 8*i_QP */
{
/* calculate a3x1 et a3x2 */
__asm__
__volatile__
(
" # mm7 = 0
\n
"
" # mm6 = ( 2, -5, 5, -2 )
\n
"
"movd 1(%2), %%mm0
\n
"
"movd 5(%2), %%mm2
\n
"
"punpcklbw %%mm7,%%mm0
\n
"
"punpcklbw %%mm7,%%mm2
\n
"
"pmaddwd %%mm6, %%mm0
\n
"
"pmaddwd %%mm6, %%mm2
\n
"
"pshufw $0xfe, %%mm0, %%mm1
\n
"
"paddd %%mm1, %%mm0 # mm0 = a3x1
\n
"
"movd %%mm0, %0
\n
"
"pshufw $0xfe, %%mm2, %%mm1
\n
"
"paddd %%mm1, %%mm2 # mm2 = a3x2
\n
"
"movd %%mm2, %1
\n
"
:
"=r"
(
a3x1
),
"=r"
(
a3x2
)
:
"r"
(
i_v
)
);
#if 0
a3x1 = 2 * ( i_v[1] - i_v[4] ) + 5 * ( i_v[3] - i_v[2] );
a3x2 = 2 * ( i_v[5] - i_v[8] ) + 5 * ( i_v[7] - i_v[6] );
#endif
if
(
a3x1
<
0
)
a3x1
=
-
a3x1
;
/* abs( a3x1 ) */
if
(
a3x2
<
0
)
a3x2
=
-
a3x2
;
/* abs( a3x2 ) */
a3x0_
=
PP_MIN3
(
a3x0
,
a3x1
,
a3x2
);
d
=
5
*
(
a3x0
-
a3x0_
)
/
8
;
/* always > 0 */
i_delta
=
(
i_v
[
4
]
-
i_v
[
5
]
)
/
2
;
/* clip into [0, i_delta] or [i_delta, 0] */
if
(
i_delta
<
0
)
{
if
(
!
b_neg
)
/* since true d has sgn(d) = - sgn( a3x0 ) */
{
d
=
-
d
;
if
(
d
<
i_delta
)
d
=
i_delta
;
i_v
[
4
]
-=
d
;
i_v
[
5
]
+=
d
;
}
}
else
{
if
(
b_neg
)
{
if
(
d
>
i_delta
)
d
=
i_delta
;
i_v
[
4
]
-=
d
;
i_v
[
5
]
+=
d
;
}
}
}
}
static
inline
void
pp_deblock_DCMode
(
uint8_t
*
p_v
,
/* = int i_v[10] */
int
i_QP
)
{
int
i_p0
,
i_p9
;
i_p0
=
PP_ABS
(
p_v
[
1
]
-
p_v
[
0
]
)
<
i_QP
?
p_v
[
0
]
:
p_v
[
1
];
i_p9
=
PP_ABS
(
p_v
[
8
]
-
p_v
[
9
]
)
<
i_QP
?
p_v
[
9
]
:
p_v
[
8
];
/* mm0 = 8 pix unmodified
-We will process first 4 pixel
mm0 = 8 pix unmodified
mm1 = for the first part of the 4 first pix
(v1) -> (p0) -> ... ( word )
(v2) (v1)
(v3) (v2)
(v4) (v3)
= for the commoin part between first and last pix
(v2) -> (v3) -> ... ( word )
(v3) (v4)
(v4) (v5)
(v5) (v6)
= for the last part of the 4 last pix
(v5) -> (v6) -> ... ( word )
(v6) (v7)
(v7) (v8)
(v8) (p9)
mm2 = acu for first new pix
mm3 = acu for last pix
mm4 = unused
mm5 = p0
mm6 = p9 << 48
mm7 = 0 */
__asm__
__volatile__
(
"pxor %%mm7, %%mm7
\n
"
"movq 1(%0), %%mm0 # get 8 pix
\n
"
" # unpack into mm1
\n
"
"movq %%mm0, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1
\n
"
" # get p_0 and i_p9
\n
"
"movd %1, %%mm5
\n
"
"movd %2, %%mm6
\n
"
"psllq $48, %%mm6
\n
"
"
\n
"
"movq %%mm1, %%mm3 # p_v[5-8] = v[1-4] !!
\n
"
"movq %%mm1, %%mm2
\n
"
"psllw $2, %%mm2 # p_v[1-4] = 4*v[1-4]
\n
"
"
\n
"
"psllq $16, %%mm1
\n
"
"por %%mm5, %%mm1 # mm1 =( p0, v1, v2 ,v3)
\n
"
"
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm2
\n
"
"
\n
"
"pshufw $0x90,%%mm1,%%mm1 # mm1 =( p0, p0, v1, v2)
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm2
\n
"
"
\n
"
"pshufw $0x90,%%mm1,%%mm1 # mm1 =( p0, p0, p0, v2)
\n
"
"paddw %%mm1, %%mm2
\n
"
"
\n
"
"pshufw $0x90,%%mm1,%%mm1 # mm1 =( p0, p0, p0, p0)
\n
"
"paddw %%mm1, %%mm2
\n
"
" # Now last part a little borring
\n
"
" # last part for mm2, beginig for mm3
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $8, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v2, v3, v4, v5 )
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $16, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v3, v4, v5, v6 )
\n
"
"psllw $1, %%mm1
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $24, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v4, v5, v6, v7)
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm3
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $32, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v5, v6, v7, v8)
\n
"
"paddw %%mm1, %%mm2
\n
"
"psllw $2, %%mm1
\n
"
"paddw %%mm1, %%mm3
\n
"
" # Now last part for last 4 pix
\n
"
" #
\n
"
"movq %%mm0, %%mm1
\n
"
"punpckhbw %%mm7, %%mm1 # mm1 = ( v5, v6, v7, v8)
\n
"
"
\n
"
"psrlq $16, %%mm1
\n
"
"por %%mm6, %%mm1 # mm1 =( v6, v7, v8, p9 )
\n
"
"
\n
"
"paddw %%mm1, %%mm3
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"pshufw $0xf9,%%mm1,%%mm1 # mm1 =( v7, v8, p9, p9)
\n
"
"paddw %%mm1, %%mm3
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"pshufw $0xf9,%%mm1,%%mm1 # mm1 =( v8, p9, p9, p9)
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"pshufw $0xf9,%%mm1,%%mm1 # mm1 =( p9, p9, p9, p9)
\n
"
"paddw %%mm1, %%mm3
\n
"
"psrlw $4, %%mm2
\n
"
"psrlw $4, %%mm3
\n
"
"packuswb %%mm3, %%mm2
\n
"
"movq %%mm2, 1(%0)
\n
"
:
:
"r"
(
p_v
),
"r"
(
i_p0
),
"r"
(
i_p9
)
:
"memory"
);
#if 0
for( i = 1; i < 9; i++ )
{
v[i] = p_v[i]; /* save 8 pix that will be modified */
}
p_v[1] = ( 6 * i_p0 + 4 * v[1]
+ 2 *( v[2] + v[3]) + v[4] + v[5]) >> 4;
p_v[2] = ( 4 * i_p0 + 2 * v[1] + 4 * v[2]
+ 2 *( v[3] + v[4]) + v[5] + v[6]) >> 4;
p_v[3] = ( 2 * i_p0 + 2 * (v[1] + v[2]) + 4 * v[3]
+ 2 *( v[4] + v[5]) + v[6] + v[7]) >> 4;
p_v[4] = ( i_p0 + v[1] + 2 * (v[2] + v[3]) + 4 * v[4]
+ 2 *( v[5] + v[6]) + v[7] + v[8]) >> 4;
p_v[5] = ( v[1] + v[2] + 2 * (v[3] + v[4]) + 4 * v[5]
+ 2 *( v[6] + v[7]) + v[8] + i_p9) >> 4;
p_v[6] = ( v[2] + v[3] + 2 * (v[4] + v[5]) + 4 * v[6]
+ 2 *( v[7] + v[8]) + 2 * i_p9) >> 4;
p_v[7] = ( v[3] + v[4] + 2 * (v[5] + v[6]) + 4 * v[7]
+ 2 * v[8] + 4 * i_p9) >> 4;
p_v[8] = ( v[4] + v[5] + 2 * (v[6] + v[7]) + 4 * v[8]
+ 6 * i_p9) >> 4;
#endif
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* ---------- filter Vertical lines so follow horizontal edges -------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_deblock_V
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
,
int
b_chroma
)
{
int
x
,
y
,
i
;
uint8_t
*
p_v
;
int
i_QP_scale
;
/* use to do ( ? >> i_QP_scale ) */
int
i_QP
;
uint8_t
i_v
[
10
];
i_QP_scale
=
b_chroma
?
5
:
4
;
for
(
y
=
8
;
y
<
i_height
-
4
;
y
+=
8
)
{
p_v
=
p_plane
+
(
y
-
5
)
*
i_stride
;
for
(
x
=
0
;
x
<
i_width
;
x
++
)
{
/* First get 10 vert pix to use them without i_stride */
for
(
i
=
0
;
i
<
10
;
i
++
)
{
i_v
[
i
]
=
p_v
[
i
*
i_stride
+
x
];
}
i_QP
=
p_QP_store
[(
y
>>
i_QP_scale
)
*
i_QP_stride
+
(
x
>>
i_QP_scale
)];
/* XXX QP is for v5 */
if
(
pp_deblock_isDC_mode
(
i_v
)
)
{
if
(
pp_deblock_isMinMaxOk
(
i_v
,
i_QP
)
)
{
pp_deblock_DCMode
(
i_v
,
i_QP
);
}
}
else
{
pp_deblock_DefaultMode
(
i_v
,
i_stride
,
i_QP
);
}
/* Copy back, XXX only 1-8 were modified */
for
(
i
=
1
;
i
<
9
;
i
++
)
{
p_v
[
i
*
i_stride
+
x
]
=
i_v
[
i
];
}
}
}
return
;
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* --------- filter Horizontal lines so follow vertical edges -------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_deblock_H
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
,
int
b_chroma
)
{
int
x
,
y
;
uint8_t
*
p_v
;
int
i_QP_scale
;
int
i_QP
;
i_QP_scale
=
b_chroma
?
5
:
4
;
for
(
y
=
0
;
y
<
i_height
;
y
++
)
{
p_v
=
p_plane
+
y
*
i_stride
-
5
;
for
(
x
=
8
;
x
<
i_width
-
4
;
x
+=
8
)
{
/* p_v point 5 pix before a block boundary */
/* XXX QP is for v5 */
i_QP
=
p_QP_store
[(
y
>>
i_QP_scale
)
*
i_QP_stride
+
(
x
>>
i_QP_scale
)];
if
(
pp_deblock_isDC_mode
(
p_v
+
x
)
)
{
if
(
pp_deblock_isMinMaxOk
(
p_v
+
x
,
i_QP
)
)
{
pp_deblock_DCMode
(
p_v
+
x
,
i_QP
);
}
}
else
{
pp_deblock_DefaultMode
(
p_v
+
x
,
i_stride
,
i_QP
);
}
}
}
return
;
}
/*****************************************************************************
*
* Internals functions common to pp_Dering_Y pp_Dering_C
*
*****************************************************************************/
static
inline
void
pp_dering_MinMax
(
uint8_t
*
p_block
,
int
i_stride
,
int
*
pi_min
,
int
*
pi_max
)
{
/* First we will extract min/max for each pix on vertical line
and next extract global min/max */
__asm__
__volatile__
(
"leal (%2,%3), %%eax
\n
"
"movq (%2), %%mm0 #load line
\n
"
"movq %%mm0, %%mm1
\n
"
MMXEXT_GET_LMINMAX
(
(
%%
eax
),
%%
mm0
,
%%
mm1
,
%%
mm7
)
MMXEXT_GET_LMINMAX
(
(
%%
eax
,
%
3
),
%%
mm0
,
%%
mm1
,
%%
mm7
)
MMXEXT_GET_LMINMAX
(
(
%%
eax
,
%
3
,
2
),
%%
mm0
,
%%
mm1
,
%%
mm7
)
MMXEXT_GET_LMINMAX
(
(
%
2
,
%
3
,
4
),
%%
mm0
,
%%
mm1
,
%%
mm7
)
"leal (%%eax,%3,4), %%eax
\n
"
MMXEXT_GET_LMINMAX
(
(
%%
eax
),
%%
mm0
,
%%
mm1
,
%%
mm7
)
MMXEXT_GET_LMINMAX
(
(
%%
eax
,
%
3
),
%%
mm0
,
%%
mm1
,
%%
mm7
)
MMXEXT_GET_LMINMAX
(
(
%%
eax
,
%
3
,
2
),
%%
mm0
,
%%
mm1
,
%%
mm7
)
MMXEXT_GET_PMIN
(
%%
mm0
,
%%
mm7
)
MMXEXT_GET_PMAX
(
%%
mm1
,
%%
mm7
)
"movd %%mm0, %%eax
\n
"
"andl $255, %%eax
\n
"
"movl %%eax, (%0)
\n
"
"movd %%mm1, %%eax
\n
"
"andl $255, %%eax
\n
"
"movl %%eax, (%1)
\n
"
:
:
"r"
(
pi_min
),
"r"
(
pi_max
),
"r"
(
p_block
),
"r"
(
i_stride
)
:
"%eax"
,
"memory"
);
#if 0
i_min = 255; i_max = 0;
for( y = 0; y < 8; y++ )
{
for( x = 0; x < 8; x++ )
{
if( i_min > p_block[x] ) i_min = p_block[x];
if( i_max < p_block[x] ) i_max = p_block[x];
}
p_block += i_stride;
}
*pi_min = i_min;
*pi_max = i_max;
#endif
}
static
inline
void
pp_dering_BinIndex
(
uint8_t
*
p_block
,
int
i_stride
,
int
i_thr
,
uint32_t
*
p_bin
)
{
int
y
;
uint32_t
i_bin
;
/* first create mm7 with all bytes set to thr and mm6 = 0 */
__asm__
__volatile__
(
"movl %0, %%eax
\n
"
"movb %%al, %%ah
\n
"
"movd %%eax, %%mm7
\n
"
"pshufw $0x00, %%mm7, %%mm7
\n
"
"pxor %%mm6, %%mm6
\n
"
:
:
"r"
(
i_thr
)
:
"%eax"
);
for
(
y
=
0
;
y
<
10
;
y
++
)
{
__asm__
__volatile__
(
"movq (%1), %%mm0
\n
"
"psubusb %%mm7, %%mm0
\n
"
/* sat makes that x <= thr --> 0 */
"pcmpeqb %%mm6, %%mm0
\n
"
/* p_block <= i_thr ? -1 : 0 */
"pmovmskb %%mm0, %0
\n
"
/* i_bin msb of each bytes */
:
"=r"
(
i_bin
)
:
"r"
(
p_block
)
);
/* Now last 2 tests */
if
(
p_block
[
8
]
<=
i_thr
)
i_bin
|=
1
<<
8
;
if
(
p_block
[
9
]
<=
i_thr
)
i_bin
|=
1
<<
9
;
i_bin
|=
(
~
i_bin
)
<<
16
;
/* for detect three 1 or three 0*/
*
p_bin
=
(
i_bin
>>
1
)
&&
(
i_bin
)
&&
(
i_bin
<<
1
);
p_block
+=
i_stride
;
p_bin
++
;
}
#if 0
int x, y;
for( y = 0; y < 10; y++ )
{
i_bin = 0;
for( x = 0; x < 10; x++ )
{
if( p_block[x] > i_thr )
{
i_bin |= 1 << x;
}
}
i_bin |= (~i_bin) << 16; /* for detect also three 0 */
*p_bin = i_bin&( i_bin >> 1 )&( i_bin << 1 );
*p_bin = i_bin;
p_block += i_stride;
p_bin++;
}
#endif
}
static
inline
void
pp_dering_Filter
(
uint8_t
*
p_block
,
int
i_stride
,
uint32_t
*
p_bin
,
int
i_QP
)
{
int
x
,
y
;
uint32_t
i_bin
;
uint8_t
i_flt
[
8
][
8
];
int
i_f
;
uint8_t
*
p_sav
;
int
i_QP_2
;
p_sav
=
p_block
;
i_QP_2
=
i_QP
>>
1
;
for
(
y
=
0
;
y
<
8
;
y
++
)
{
i_bin
=
p_bin
[
y
]
&
p_bin
[
y
+
1
]
&
p_bin
[
y
+
2
];
/* To be optimised */
i_bin
|=
i_bin
>>
16
;
/* detect 0 or 1 */
for
(
x
=
0
;
x
<
8
;
x
++
)
{
if
(
i_bin
&
0x02
)
/* 0x02 since 10 index but want 1-9 */
{
/* apply dering */
/* 1 2 1
2 4 2 + (8)
1 2 1 */
i_f
=
p_block
[
x
-
i_stride
-
1
]
+
(
p_block
[
x
-
i_stride
]
<<
1
)
+
p_block
[
x
-
i_stride
+
1
]
+
(
p_block
[
x
-
1
]
<<
1
)
+
(
p_block
[
x
]
<<
2
)
+
(
p_block
[
x
+
1
]
<<
1
)
+
p_block
[
x
+
i_stride
-
1
]
+
(
p_block
[
x
+
i_stride
]
<<
1
)
+
p_block
[
x
+
i_stride
+
1
];
i_flt
[
y
][
x
]
=
(
8
+
i_f
)
>>
4
;
}
else
{
i_flt
[
y
][
x
]
=
p_block
[
x
];
}
i_bin
>>=
1
;
}
p_block
+=
i_stride
;
}
/* Create mm7 with all bytes set to QP/2 */
__asm__
__volatile__
(
"movl %0, %%eax
\n
"
"shrl $1, %%eax
\n
"
/* i_QP/2 */
"movb %%al, %%ah
\n
"
"movd %%eax, %%mm7
\n
"
"pshufw $0x00, %%mm7, %%mm7
\n
"
:
:
"r"
(
i_QP
)
:
"%eax"
);
for
(
y
=
0
;
y
<
8
;
y
++
)
{
/* clamp those values and copy them */
__asm__
__volatile__
(
"movq (%0), %%mm0
\n
"
/* mm0 = i_ftl[y][0] ... i_ftl[y][7] */
"movq (%1), %%mm1
\n
"
/* mm1 = p_sav[0] ... p_sav[7] */
"movq %%mm1, %%mm2
\n
"
"psubusb %%mm7, %%mm1
\n
"
/* mm1 = psav - i_QP/2 ( >= 0 ) */
"paddusb %%mm7, %%mm2
\n
"
/* mm2 = psav + i_QP/2 ( <= 255 ) */
"pmaxub %%mm1, %%mm0
\n
"
/* psav - i_QP/2 <= mm0 */
"pminub %%mm2, %%mm0
\n
"
/* mm0 <= psav + i_QP/2 */
"movq %%mm0, (%1)
\n
"
:
:
"r"
(
i_flt
[
y
]),
"r"
(
p_sav
)
:
"memory"
);
p_sav
+=
i_stride
;
}
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* ----------------- Dering filter on Y and C blocks ----------------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_dering_Y
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
)
{
int
x
,
y
,
k
;
int
i_max
[
4
],
i_min
[
4
],
i_range
[
4
];
int
i_thr
[
4
];
int
i_max_range
,
i_kmax
;
uint32_t
i_bin
[
4
][
10
];
uint8_t
*
p_block
[
4
];
QT_STORE_T
*
p_QP
;
/* We process 4 blocks/loop*/
for
(
y
=
8
;
y
<
i_height
-
8
;
y
+=
16
)
{
/* +---+
|0|1|
+-+-+ :))
|2|3|
+-+-+ */
p_block
[
0
]
=
p_plane
+
y
*
i_stride
+
8
;
p_block
[
1
]
=
p_block
[
0
]
+
8
;
p_block
[
2
]
=
p_block
[
0
]
+
(
i_stride
<<
3
);
p_block
[
3
]
=
p_block
[
2
]
+
8
;
for
(
x
=
8
;
x
<
i_width
-
8
;
x
+=
16
)
{
/* 1: Calculate threshold */
/* Calculate max/min for each block */
pp_dering_MinMax
(
p_block
[
0
],
i_stride
,
&
i_min
[
0
],
&
i_max
[
0
]
);
pp_dering_MinMax
(
p_block
[
1
],
i_stride
,
&
i_min
[
1
],
&
i_max
[
1
]
);
pp_dering_MinMax
(
p_block
[
2
],
i_stride
,
&
i_min
[
2
],
&
i_max
[
2
]
);
pp_dering_MinMax
(
p_block
[
3
],
i_stride
,
&
i_min
[
3
],
&
i_max
[
3
]
);
/* Calculate range, max_range and thr */
i_max_range
=
0
;
i_kmax
=
0
;
for
(
k
=
0
;
k
<
4
;
k
++
)
{
i_range
[
k
]
=
i_max
[
k
]
-
i_min
[
k
];
i_thr
[
k
]
=
(
i_max
[
k
]
+
i_min
[
k
]
+
1
)
/
2
;
if
(
i_max_range
<
i_max
[
k
])
{
i_max_range
=
i_max
[
k
];
i_kmax
=
k
;
}
}
/* Now rearrange thr */
if
(
i_max_range
>
64
)
{
for
(
k
=
1
;
k
<
5
;
k
++
)
{
if
(
i_range
[
k
]
<
16
)
{
i_thr
[
k
]
=
0
;
}
else
if
(
i_range
[
k
]
<
32
)
{
i_thr
[
k
]
=
i_thr
[
i_kmax
];
}
}
}
else
{
for
(
k
=
1
;
k
<
5
;
k
++
)
{
if
(
i_range
[
k
]
<
16
)
{
i_thr
[
k
]
=
0
;
}
}
}
/* 2: Index acquisition 10x10 ! so " -i_stride - 1"*/
pp_dering_BinIndex
(
p_block
[
0
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
0
],
i_bin
[
0
]
);
pp_dering_BinIndex
(
p_block
[
1
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
1
],
i_bin
[
1
]
);
pp_dering_BinIndex
(
p_block
[
2
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
2
],
i_bin
[
2
]
);
pp_dering_BinIndex
(
p_block
[
3
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
3
],
i_bin
[
3
]
);
/* 3: adaptive smoothing */
/* since we begin at (8,8) QP can be different for each block */
p_QP
=
&
(
p_QP_store
[(
y
>>
4
)
*
i_QP_stride
+
(
x
>>
4
)]
);
pp_dering_Filter
(
p_block
[
0
],
i_stride
,
i_bin
[
0
],
p_QP
[
0
]
);
pp_dering_Filter
(
p_block
[
1
],
i_stride
,
i_bin
[
1
],
p_QP
[
1
]
);
pp_dering_Filter
(
p_block
[
2
],
i_stride
,
i_bin
[
2
],
p_QP
[
i_QP_stride
]
);
pp_dering_Filter
(
p_block
[
3
],
i_stride
,
i_bin
[
3
],
p_QP
[
i_QP_stride
+
1
]
);
p_block
[
0
]
+=
8
;
p_block
[
1
]
+=
8
;
p_block
[
2
]
+=
8
;
p_block
[
3
]
+=
8
;
}
}
}
void
E_
(
pp_dering_C
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
)
{
int
x
,
y
;
int
i_max
,
i_min
;
int
i_thr
;
uint32_t
i_bin
[
10
];
uint8_t
*
p_block
;
for
(
y
=
8
;
y
<
i_height
-
8
;
y
+=
8
)
{
p_block
=
p_plane
+
y
*
i_stride
+
8
;
for
(
x
=
8
;
x
<
i_width
-
8
;
x
+=
8
)
{
/* 1: Calculate threshold */
/* Calculate max/min for each block */
pp_dering_MinMax
(
p_block
,
i_stride
,
&
i_min
,
&
i_max
);
/* Calculate thr*/
i_thr
=
(
i_max
+
i_min
+
1
)
/
2
;
/* 2: Index acquisition 10x10 */
/* point on 10x10 in wich we have our 8x8 block */
pp_dering_BinIndex
(
p_block
-
i_stride
-
1
,
i_stride
,
i_thr
,
i_bin
);
/* 3: adaptive smoothing */
pp_dering_Filter
(
p_block
,
i_stride
,
i_bin
,
p_QP_store
[(
y
>>
5
)
*
i_QP_stride
+
(
x
>>
5
)]);
p_block
+=
8
;
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment