Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc
Commits
313acca3
Commit
313acca3
authored
Oct 22, 2004
by
Gildas Bazin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
* modules/codec/ffmpeg/postprocessing: removed deprecated postprocessing code.
parent
b15eda88
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
0 additions
and
2783 deletions
+0
-2783
configure.ac
configure.ac
+0
-1
modules/codec/ffmpeg/postprocessing/Modules.am
modules/codec/ffmpeg/postprocessing/Modules.am
+0
-20
modules/codec/ffmpeg/postprocessing/postprocessing.c
modules/codec/ffmpeg/postprocessing/postprocessing.c
+0
-223
modules/codec/ffmpeg/postprocessing/postprocessing.h
modules/codec/ffmpeg/postprocessing/postprocessing.h
+0
-56
modules/codec/ffmpeg/postprocessing/postprocessing_c.c
modules/codec/ffmpeg/postprocessing/postprocessing_c.c
+0
-625
modules/codec/ffmpeg/postprocessing/postprocessing_common.h
modules/codec/ffmpeg/postprocessing/postprocessing_common.h
+0
-52
modules/codec/ffmpeg/postprocessing/postprocessing_mmx.c
modules/codec/ffmpeg/postprocessing/postprocessing_mmx.c
+0
-889
modules/codec/ffmpeg/postprocessing/postprocessing_mmxext.c
modules/codec/ffmpeg/postprocessing/postprocessing_mmxext.c
+0
-917
No files found.
configure.ac
View file @
313acca3
...
@@ -4138,7 +4138,6 @@ AC_CONFIG_FILES([
...
@@ -4138,7 +4138,6 @@ AC_CONFIG_FILES([
modules/codec/cmml/Makefile
modules/codec/cmml/Makefile
modules/codec/dmo/Makefile
modules/codec/dmo/Makefile
modules/codec/ffmpeg/Makefile
modules/codec/ffmpeg/Makefile
modules/codec/ffmpeg/postprocessing/Makefile
modules/codec/ogt/Makefile
modules/codec/ogt/Makefile
modules/codec/spudec/Makefile
modules/codec/spudec/Makefile
modules/control/Makefile
modules/control/Makefile
...
...
modules/codec/ffmpeg/postprocessing/Modules.am
deleted
100644 → 0
View file @
b15eda88
SOURCES_postprocessing_c = \
postprocessing.c \
postprocessing_c.c \
$(NULL)
SOURCES_postprocessing_mmx = \
postprocessing.c \
postprocessing_mmx.c \
$(NULL)
SOURCES_postprocessing_mmxext = \
postprocessing.c \
postprocessing_mmxext.c \
$(NULL)
noinst_HEADERS += \
postprocessing.h \
postprocessing_common.h \
$(NULL)
modules/codec/ffmpeg/postprocessing/postprocessing.c
deleted
100644 → 0
View file @
b15eda88
/*****************************************************************************
* postprocessing.c
*****************************************************************************
* Copyright (C) 1999-2001 VideoLAN
* $Id$
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
/*****************************************************************************
* Preamble
*****************************************************************************/
#include <stdlib.h>
#include <string.h>
#include <vlc/vlc.h>
#include <vlc/vout.h>
#include "postprocessing.h"
#include "postprocessing_common.h"
static
int
Open
(
vlc_object_t
*
p_this
);
static
uint32_t
pp_getmode
(
int
i_quality
,
int
b_autolevel
);
static
int
pp_postprocess
(
picture_t
*
,
QT_STORE_T
*
,
unsigned
int
,
unsigned
int
i_mode
);
/*****************************************************************************
* Module descriptor
*****************************************************************************/
vlc_module_begin
();
#ifdef MODULE_NAME_IS_postprocessing_c
set_description
(
_
(
"C post processing"
)
);
set_capability
(
"postprocessing"
,
50
);
add_shortcut
(
"c"
);
#elif defined( MODULE_NAME_IS_postprocessing_mmx )
set_description
(
_
(
"MMX post processing"
)
);
set_capability
(
"postprocessing"
,
100
);
add_requirement
(
MMX
);
add_shortcut
(
"mmx"
);
#elif defined( MODULE_NAME_IS_postprocessing_mmxext )
set_description
(
_
(
"MMX EXT post processing"
)
);
set_capability
(
"postprocessing"
,
150
);
add_requirement
(
MMXEXT
);
add_shortcut
(
"mmxext"
);
add_shortcut
(
"mmx2"
);
#endif
set_callbacks
(
Open
,
NULL
);
vlc_module_end
();
/*****************************************************************************
* Module initializer
*****************************************************************************/
static
int
Open
(
vlc_object_t
*
p_this
)
{
postprocessing_t
*
p_pp
=
(
postprocessing_t
*
)
p_this
;
p_pp
->
pf_getmode
=
pp_getmode
;
p_pp
->
pf_postprocess
=
pp_postprocess
;
return
VLC_SUCCESS
;
}
static
uint32_t
pp_getmode
(
int
i_quality
,
int
b_autolevel
)
{
uint32_t
i_mode
;
i_quality
=
i_quality
<
0
?
0
:
i_quality
;
i_quality
=
i_quality
>
6
?
6
:
i_quality
;
switch
(
i_quality
)
{
case
(
0
):
i_mode
=
0
;
break
;
case
(
1
):
i_mode
=
PP_DEBLOCK_Y_H
;
break
;
case
(
2
):
i_mode
=
PP_DEBLOCK_Y_H
|
PP_DEBLOCK_Y_V
;
break
;
case
(
3
):
i_mode
=
PP_DEBLOCK_Y_H
|
PP_DEBLOCK_Y_V
|
PP_DEBLOCK_C_H
;
break
;
case
(
4
):
i_mode
=
PP_DEBLOCK_Y_H
|
PP_DEBLOCK_Y_V
|
PP_DEBLOCK_C_H
|
PP_DEBLOCK_C_V
;
break
;
case
(
5
):
i_mode
=
PP_DEBLOCK_Y_H
|
PP_DEBLOCK_Y_V
|
PP_DEBLOCK_C_H
|
PP_DEBLOCK_C_V
|
PP_DERING_Y
;
break
;
case
(
6
):
i_mode
=
PP_DEBLOCK_Y_H
|
PP_DEBLOCK_Y_V
|
PP_DEBLOCK_C_H
|
PP_DEBLOCK_C_V
|
PP_DERING_Y
|
PP_DERING_C
;
break
;
default:
i_mode
=
0
;
}
if
(
b_autolevel
)
{
i_mode
|=
PP_AUTOLEVEL
;
}
return
(
i_mode
);
}
/*****************************************************************************
* pp_postprocess : make post-filter as defined in MPEG4-ISO
*****************************************************************************
*****************************************************************************/
static
int
pp_postprocess
(
picture_t
*
p_pic
,
QT_STORE_T
*
p_QP_store
,
unsigned
int
i_QP_stride
,
unsigned
int
i_mode
)
{
/* Some sanity checks */
/* if( ( p_pic->i_height&0x0f )||( p_pic->i_width&0x0f )||*/
if
(
(
p_pic
->
p_heap
->
i_chroma
!=
VLC_FOURCC
(
'I'
,
'4'
,
'2'
,
'0'
)
)
&&
(
p_pic
->
p_heap
->
i_chroma
!=
VLC_FOURCC
(
'Y'
,
'V'
,
'1'
,
'2'
)
)
)
{
return
(
PP_ERR_INVALID_PICTURE
);
}
if
(
(
!
p_QP_store
)
||
(
i_QP_stride
<
p_pic
->
p_heap
->
i_width
>>
4
)
)
{
return
(
PP_ERR_INVALID_QP
);
}
/* First do vertical deblocking and then horizontal */
/* Luminance */
if
(
i_mode
&
PP_DEBLOCK_Y_V
)
{
E_
(
pp_deblock_V
)(
p_pic
->
Y_PIXELS
,
p_pic
->
p_heap
->
i_width
,
p_pic
->
p_heap
->
i_height
,
p_pic
->
Y_PITCH
,
p_QP_store
,
i_QP_stride
,
0
);
}
if
(
i_mode
&
PP_DEBLOCK_Y_H
)
{
E_
(
pp_deblock_H
)(
p_pic
->
Y_PIXELS
,
p_pic
->
p_heap
->
i_width
,
p_pic
->
p_heap
->
i_height
,
p_pic
->
Y_PITCH
,
p_QP_store
,
i_QP_stride
,
0
);
}
/* Chrominance */
if
(
i_mode
&
PP_DEBLOCK_C_V
)
{
E_
(
pp_deblock_V
)(
p_pic
->
U_PIXELS
,
p_pic
->
p_heap
->
i_width
>>
1
,
p_pic
->
p_heap
->
i_height
>>
1
,
p_pic
->
U_PITCH
,
p_QP_store
,
i_QP_stride
,
1
);
E_
(
pp_deblock_V
)(
p_pic
->
V_PIXELS
,
p_pic
->
p_heap
->
i_width
>>
1
,
p_pic
->
p_heap
->
i_height
>>
1
,
p_pic
->
V_PITCH
,
p_QP_store
,
i_QP_stride
,
1
);
}
if
(
i_mode
&
PP_DEBLOCK_C_H
)
{
E_
(
pp_deblock_H
)(
p_pic
->
U_PIXELS
,
p_pic
->
p_heap
->
i_width
>>
1
,
p_pic
->
p_heap
->
i_height
>>
1
,
p_pic
->
U_PITCH
,
p_QP_store
,
i_QP_stride
,
1
);
E_
(
pp_deblock_H
)(
p_pic
->
V_PIXELS
,
p_pic
->
p_heap
->
i_width
>>
1
,
p_pic
->
p_heap
->
i_height
>>
1
,
p_pic
->
V_PITCH
,
p_QP_store
,
i_QP_stride
,
1
);
}
/* After deblocking do dering */
/* TODO check for min size */
if
(
i_mode
&
PP_DERING_Y
)
{
E_
(
pp_dering_Y
)(
p_pic
->
Y_PIXELS
,
p_pic
->
p_heap
->
i_width
,
p_pic
->
p_heap
->
i_height
,
p_pic
->
Y_PITCH
,
p_QP_store
,
i_QP_stride
);
}
if
(
i_mode
&
PP_DERING_C
)
{
E_
(
pp_dering_C
)(
p_pic
->
U_PIXELS
,
p_pic
->
p_heap
->
i_width
>>
1
,
p_pic
->
p_heap
->
i_height
>>
1
,
p_pic
->
U_PITCH
,
p_QP_store
,
i_QP_stride
);
E_
(
pp_dering_C
)(
p_pic
->
V_PIXELS
,
p_pic
->
p_heap
->
i_width
>>
1
,
p_pic
->
p_heap
->
i_height
>>
1
,
p_pic
->
V_PITCH
,
p_QP_store
,
i_QP_stride
);
}
#if defined( MODULE_NAME_IS_postprocessing_mmx )||defined( MODULE_NAME_IS_postprocessing_mmxext )
/* We have used MMX so return to safe FPU state */
__asm__
__volatile__
(
"emms"
);
#endif
return
(
PP_ERR_OK
);
}
modules/codec/ffmpeg/postprocessing/postprocessing.h
deleted
100644 → 0
View file @
b15eda88
/*****************************************************************************
* postprocessing.h
*****************************************************************************
* Copyright (C) 2001 VideoLAN
* $Id: postprocessing.h,v 1.3 2003/10/25 00:49:14 sam Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#define QT_STORE_T int8_t
/* postprocessing available using to create i_mode */
#define PP_DEBLOCK_Y_H 0x00000001
#define PP_DEBLOCK_Y_V 0x00000002
#define PP_DEBLOCK_C_H 0x00000004
#define PP_DEBLOCK_C_V 0x00000008
#define PP_DERING_Y 0x00000010
#define PP_DERING_C 0x00000020
#define PP_AUTOLEVEL 0x80000000
/* error code, not really used */
#define PP_ERR_OK 0
/* no problem */
#define PP_ERR_INVALID_PICTURE 1
/* wrong picture size or chroma */
#define PP_ERR_INVALID_QP 2
/* need valid QP to make the postprocess */
#define PP_ERR_UNKNOWN 255
typedef
struct
postprocessing_s
{
VLC_COMMON_MEMBERS
module_t
*
p_module
;
uint32_t
(
*
pf_getmode
)(
int
i_quality
,
int
b_autolevel
);
int
(
*
pf_postprocess
)(
picture_t
*
p_pic
,
QT_STORE_T
*
p_QP_store
,
unsigned
int
i_QP_stride
,
unsigned
int
i_mode
);
}
postprocessing_t
;
modules/codec/ffmpeg/postprocessing/postprocessing_c.c
deleted
100644 → 0
View file @
b15eda88
/*****************************************************************************
* postprocessing_c.c: Post Processing plugin in C
*****************************************************************************
* Copyright (C) 2001 VideoLAN
* $Id: postprocessing_c.c,v 1.2 2002/12/06 16:34:05 sam Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include <vlc/vlc.h>
/* only use uint8_t, uint32_t .... */
#include "postprocessing.h"
#include "postprocessing_common.h"
/*****************************************************************************
*
* Internals functions common to pp_deblock_V and pp_deblock_H
*
*****************************************************************************/
/****************************************************************************
* pp_deblock_isDC_mode : Check if we will use DC mode or Default mode
****************************************************************************
* Use constant PP_THR1 and PP_THR2 ( PP_2xTHR1 )
*
* Called for for each pixel on a boundary block when doing deblocking
* so need to be fast ...
*
****************************************************************************/
static
inline
int
pp_deblock_isDC_mode
(
uint8_t
*
p_v
)
{
unsigned
int
i_eq_cnt
;
/* algo : if ( | v[i] -v[i+1] | <= PP_THR1 ) { i_eq_cnt++; } */
i_eq_cnt
=
0
;
if
((
(
p_v
[
0
]
-
p_v
[
1
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
1
]
-
p_v
[
2
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
2
]
-
p_v
[
3
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
3
]
-
p_v
[
4
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
4
]
-
p_v
[
5
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
5
]
-
p_v
[
6
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
6
]
-
p_v
[
7
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
7
]
-
p_v
[
8
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
if
((
(
p_v
[
8
]
-
p_v
[
9
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
i_eq_cnt
++
;
#if 0
int i;
for( i =0; i < 9; i++ )
{
if(( ( p_v[i] - p_v[i+1] + PP_THR1 )&0xffff )<= PP_2xTHR1 )
{
i_eq_cnt++;
}
}
#endif
return
(
(
i_eq_cnt
>=
PP_THR2
)
?
1
:
0
);
}
static
inline
int
pp_deblock_isMinMaxOk
(
uint8_t
*
p_v
,
int
i_QP
)
{
int
i_max
,
i_min
;
i_min
=
i_max
=
p_v
[
1
];
if
(
i_max
<
p_v
[
1
]
)
i_max
=
p_v
[
1
];
if
(
i_min
>
p_v
[
1
]
)
i_min
=
p_v
[
1
];
if
(
i_max
<
p_v
[
2
]
)
i_max
=
p_v
[
2
];
if
(
i_min
>
p_v
[
2
]
)
i_min
=
p_v
[
2
];
if
(
i_max
<
p_v
[
3
]
)
i_max
=
p_v
[
3
];
if
(
i_min
>
p_v
[
3
]
)
i_min
=
p_v
[
3
];
if
(
i_max
<
p_v
[
4
]
)
i_max
=
p_v
[
4
];
if
(
i_min
>
p_v
[
4
]
)
i_min
=
p_v
[
4
];
if
(
i_max
<
p_v
[
5
]
)
i_max
=
p_v
[
5
];
if
(
i_min
>
p_v
[
5
]
)
i_min
=
p_v
[
5
];
if
(
i_max
<
p_v
[
6
]
)
i_max
=
p_v
[
6
];
if
(
i_min
>
p_v
[
6
]
)
i_min
=
p_v
[
6
];
if
(
i_max
<
p_v
[
7
]
)
i_max
=
p_v
[
7
];
if
(
i_min
>
p_v
[
7
]
)
i_min
=
p_v
[
7
];
if
(
i_max
<
p_v
[
8
]
)
i_max
=
p_v
[
8
];
if
(
i_min
>
p_v
[
8
]
)
i_min
=
p_v
[
8
];
#if 0
int i;
int i_range;
for( i = 2; i < 9; i++ )
{
if( i_max < p_v[i] ) i_max = p_v[i];
if( i_min > p_v[i] ) i_min = p_v[i];
}
i_range = i_max - i_min;
#endif
return
(
i_max
-
i_min
<
2
*
i_QP
?
1
:
0
);
}
static
inline
void
pp_deblock_DefaultMode
(
uint8_t
i_v
[
10
],
int
i_stride
,
int
i_QP
)
{
int
d
,
i_delta
;
int
a3x0
,
a3x0_
,
a3x1
,
a3x2
;
int
b_neg
;
/* d = CLIP( 5(a3x0' - a3x0)//8, 0, (v4-v5)/2 ).d( abs(a3x0) < QP ) */
/* First calculate a3x0 */
a3x0
=
2
*
(
i_v
[
3
]
-
i_v
[
6
]
)
+
5
*
(
i_v
[
5
]
-
i_v
[
4
]
);
if
(
a3x0
<
0
)
{
b_neg
=
1
;
a3x0
=
-
a3x0
;
}
else
{
b_neg
=
0
;
}
/* XXX Now a3x0 is abs( a3x0 ) */
if
(
(
a3x0
<
8
*
i_QP
)
&&
(
a3x0
!=
0
)
)
/* |a3x0| < 8*i_QP */
{
/* calculate a3x1 et a3x2 */
a3x1
=
2
*
(
i_v
[
1
]
-
i_v
[
4
]
)
+
5
*
(
i_v
[
3
]
-
i_v
[
2
]
);
a3x2
=
2
*
(
i_v
[
5
]
-
i_v
[
8
]
)
+
5
*
(
i_v
[
7
]
-
i_v
[
6
]
);
if
(
a3x1
<
0
)
a3x1
=
-
a3x1
;
/* abs( a3x1 ) */
if
(
a3x2
<
0
)
a3x2
=
-
a3x2
;
/* abs( a3x2 ) */
a3x0_
=
PP_MIN3
(
a3x0
,
a3x1
,
a3x2
);
d
=
5
*
(
a3x0
-
a3x0_
)
/
8
;
/* always > 0 */
i_delta
=
(
i_v
[
4
]
-
i_v
[
5
]
)
/
2
;
/* clip into [0, i_delta] or [i_delta, 0] */
if
(
i_delta
<
0
)
{
if
(
!
b_neg
)
/* since true d has sgn(d) = - sgn( a3x0 ) */
{
d
=
-
d
;
if
(
d
<
i_delta
)
d
=
i_delta
;
i_v
[
4
]
-=
d
;
i_v
[
5
]
+=
d
;
}
}
else
{
if
(
b_neg
)
{
if
(
d
>
i_delta
)
d
=
i_delta
;
i_v
[
4
]
-=
d
;
i_v
[
5
]
+=
d
;
}
}
}
}
static
inline
void
pp_deblock_DCMode
(
uint8_t
*
p_v
,
/* = int i_v[10] */
int
i_QP
)
{
int
v
[
10
];
int
i
;
int
i_p0
,
i_p9
;
i_p0
=
PP_ABS
(
p_v
[
1
]
-
p_v
[
0
]
)
<
i_QP
?
p_v
[
0
]
:
p_v
[
1
];
i_p9
=
PP_ABS
(
p_v
[
8
]
-
p_v
[
9
]
)
<
i_QP
?
p_v
[
9
]
:
p_v
[
8
];
for
(
i
=
1
;
i
<
9
;
i
++
)
{
v
[
i
]
=
p_v
[
i
];
/* save 8 pix that will be modified */
}
p_v
[
1
]
=
(
6
*
i_p0
+
4
*
v
[
1
]
+
2
*
(
v
[
2
]
+
v
[
3
])
+
v
[
4
]
+
v
[
5
])
>>
4
;
p_v
[
2
]
=
(
4
*
i_p0
+
2
*
v
[
1
]
+
4
*
v
[
2
]
+
2
*
(
v
[
3
]
+
v
[
4
])
+
v
[
5
]
+
v
[
6
])
>>
4
;
p_v
[
3
]
=
(
2
*
i_p0
+
2
*
(
v
[
1
]
+
v
[
2
])
+
4
*
v
[
3
]
+
2
*
(
v
[
4
]
+
v
[
5
])
+
v
[
6
]
+
v
[
7
])
>>
4
;
p_v
[
4
]
=
(
i_p0
+
v
[
1
]
+
2
*
(
v
[
2
]
+
v
[
3
])
+
4
*
v
[
4
]
+
2
*
(
v
[
5
]
+
v
[
6
])
+
v
[
7
]
+
v
[
8
])
>>
4
;
p_v
[
5
]
=
(
v
[
1
]
+
v
[
2
]
+
2
*
(
v
[
3
]
+
v
[
4
])
+
4
*
v
[
5
]
+
2
*
(
v
[
6
]
+
v
[
7
])
+
v
[
8
]
+
i_p9
)
>>
4
;
p_v
[
6
]
=
(
v
[
2
]
+
v
[
3
]
+
2
*
(
v
[
4
]
+
v
[
5
])
+
4
*
v
[
6
]
+
2
*
(
v
[
7
]
+
v
[
8
])
+
2
*
i_p9
)
>>
4
;
p_v
[
7
]
=
(
v
[
3
]
+
v
[
4
]
+
2
*
(
v
[
5
]
+
v
[
6
])
+
4
*
v
[
7
]
+
2
*
v
[
8
]
+
4
*
i_p9
)
>>
4
;
p_v
[
8
]
=
(
v
[
4
]
+
v
[
5
]
+
2
*
(
v
[
6
]
+
v
[
7
])
+
4
*
v
[
8
]
+
6
*
i_p9
)
>>
4
;
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* ---------- filter Vertical lines so follow horizontal edges -------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_deblock_V
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
,
int
b_chroma
)
{
int
x
,
y
,
i
;
uint8_t
*
p_v
;
int
i_QP_scale
;
/* use to do ( ? >> i_QP_scale ) */
int
i_QP
;
uint8_t
i_v
[
10
];
i_QP_scale
=
b_chroma
?
5
:
4
;
for
(
y
=
8
;
y
<
i_height
-
4
;
y
+=
8
)
{
p_v
=
p_plane
+
(
y
-
5
)
*
i_stride
;
for
(
x
=
0
;
x
<
i_width
;
x
++
)
{
/* First get 10 vert pix to use them without i_stride */
for
(
i
=
0
;
i
<
10
;
i
++
)
{
i_v
[
i
]
=
p_v
[
i
*
i_stride
+
x
];
}
i_QP
=
p_QP_store
[(
y
>>
i_QP_scale
)
*
i_QP_stride
+
(
x
>>
i_QP_scale
)];
/* XXX QP is for v5 */
if
(
pp_deblock_isDC_mode
(
i_v
)
)
{
if
(
pp_deblock_isMinMaxOk
(
i_v
,
i_QP
)
)
{
pp_deblock_DCMode
(
i_v
,
i_QP
);
}
}
else
{
pp_deblock_DefaultMode
(
i_v
,
i_stride
,
i_QP
);
}
/* Copy back, XXX only 1-8 were modified */
for
(
i
=
1
;
i
<
9
;
i
++
)
{
p_v
[
i
*
i_stride
+
x
]
=
i_v
[
i
];
}
}
}
return
;
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* --------- filter Horizontal lines so follow vertical edges -------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_deblock_H
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
,
int
b_chroma
)
{
int
x
,
y
;
uint8_t
*
p_v
;
int
i_QP_scale
;
int
i_QP
;
i_QP_scale
=
b_chroma
?
5
:
4
;
for
(
y
=
0
;
y
<
i_height
;
y
++
)
{
p_v
=
p_plane
+
y
*
i_stride
-
5
;
for
(
x
=
8
;
x
<
i_width
-
4
;
x
+=
8
)
{
/* p_v point 5 pix before a block boundary */
/* XXX QP is for v5 */
i_QP
=
p_QP_store
[(
y
>>
i_QP_scale
)
*
i_QP_stride
+
(
x
>>
i_QP_scale
)];
if
(
pp_deblock_isDC_mode
(
p_v
+
x
)
)
{
if
(
pp_deblock_isMinMaxOk
(
p_v
+
x
,
i_QP
)
)
{
pp_deblock_DCMode
(
p_v
+
x
,
i_QP
);
}
}
else
{
pp_deblock_DefaultMode
(
p_v
+
x
,
i_stride
,
i_QP
);
}
}
}
return
;
}
/*****************************************************************************
*
* Internals functions common to pp_Dering_Y pp_Dering_C
*
*****************************************************************************/
static
inline
void
pp_dering_MinMax
(
uint8_t
*
p_block
,
int
i_stride
,
int
*
pi_min
,
int
*
pi_max
)
{
int
y
;
int
i_min
,
i_max
;
i_min
=
255
;
i_max
=
0
;
for
(
y
=
0
;
y
<
8
;
y
++
)
{
if
(
i_min
>
p_block
[
0
]
)
i_min
=
p_block
[
0
];
if
(
i_max
<
p_block
[
0
]
)
i_max
=
p_block
[
0
];
if
(
i_min
>
p_block
[
1
]
)
i_min
=
p_block
[
1
];
if
(
i_max
<
p_block
[
1
]
)
i_max
=
p_block
[
1
];
if
(
i_min
>
p_block
[
2
]
)
i_min
=
p_block
[
2
];
if
(
i_max
<
p_block
[
2
]
)
i_max
=
p_block
[
2
];
if
(
i_min
>
p_block
[
3
]
)
i_min
=
p_block
[
3
];
if
(
i_max
<
p_block
[
3
]
)
i_max
=
p_block
[
3
];
if
(
i_min
>
p_block
[
4
]
)
i_min
=
p_block
[
4
];
if
(
i_max
<
p_block
[
4
]
)
i_max
=
p_block
[
4
];
if
(
i_min
>
p_block
[
5
]
)
i_min
=
p_block
[
5
];
if
(
i_max
<
p_block
[
5
]
)
i_max
=
p_block
[
5
];
if
(
i_min
>
p_block
[
6
]
)
i_min
=
p_block
[
6
];
if
(
i_max
<
p_block
[
6
]
)
i_max
=
p_block
[
6
];
if
(
i_min
>
p_block
[
7
]
)
i_min
=
p_block
[
7
];
if
(
i_max
<
p_block
[
7
]
)
i_max
=
p_block
[
7
];
#if 0
int x;
for( x = 0; x < 8; x++ )
{
if( i_min > p_block[x] ) i_min = p_block[x];
if( i_max < p_block[x] ) i_max = p_block[x];
}
#endif
p_block
+=
i_stride
;
}
*
pi_min
=
i_min
;
*
pi_max
=
i_max
;
}
static
inline
void
pp_dering_BinIndex
(
uint8_t
*
p_block
,
int
i_stride
,
int
i_thr
,
uint32_t
*
p_bin
)
{
int
x
,
y
;
uint32_t
i_bin
;
for
(
y
=
0
;
y
<
10
;
y
++
)
{
i_bin
=
0
;
for
(
x
=
0
;
x
<
10
;
x
++
)
{
if
(
p_block
[
x
]
>
i_thr
)
{
i_bin
|=
1
<<
x
;
}
}
i_bin
|=
(
~
i_bin
)
<<
16
;
/* for detect also three 0 */
*
p_bin
=
i_bin
&
(
i_bin
>>
1
)
&
(
i_bin
<<
1
);
p_block
+=
i_stride
;
p_bin
++
;
}
}
static
inline
void
pp_dering_Filter
(
uint8_t
*
p_block
,
int
i_stride
,
uint32_t
*
p_bin
,
int
i_QP
)
{
int
x
,
y
;
uint32_t
i_bin
;
int
i_flt
[
8
][
8
];
int
i_f
;
uint8_t
*
p_sav
;
int
i_QP_2
;
p_sav
=
p_block
;
i_QP_2
=
i_QP
>>
1
;
for
(
y
=
0
;
y
<
8
;
y
++
)
{
i_bin
=
p_bin
[
y
]
&
p_bin
[
y
+
1
]
&
p_bin
[
y
+
2
];
/* To be optimised */
i_bin
|=
i_bin
>>
16
;
/* detect 0 or 1 */
for
(
x
=
0
;
x
<
8
;
x
++
)
{
if
(
i_bin
&
0x02
)
/* 0x02 since 10 index but want 1-9 */
{
/* apply dering */
/* 1 2 1
2 4 2 + (8)
1 2 1 */
i_f
=
p_block
[
x
-
i_stride
-
1
]
+
(
p_block
[
x
-
i_stride
]
<<
1
)
+
p_block
[
x
-
i_stride
+
1
]
+
(
p_block
[
x
-
1
]
<<
1
)
+
(
p_block
[
x
]
<<
2
)
+
(
p_block
[
x
+
1
]
<<
1
)
+
p_block
[
x
+
i_stride
-
1
]
+
(
p_block
[
x
+
i_stride
]
<<
1
)
+
p_block
[
x
+
i_stride
+
1
];
i_f
=
(
8
+
i_f
)
>>
4
;
/* Clamp this value */
if
(
i_f
-
p_block
[
x
]
>
(
i_QP_2
)
)
{
i_flt
[
y
][
x
]
=
p_block
[
x
]
+
i_QP_2
;
}
else
if
(
i_f
-
p_block
[
x
]
<
-
i_QP_2
)
{
i_flt
[
y
][
x
]
=
p_block
[
x
]
-
i_QP_2
;
}
else
{
i_flt
[
y
][
x
]
=
i_f
;
}
}
else
{
i_flt
[
y
][
x
]
=
p_block
[
x
];
}
i_bin
>>=
1
;
}
p_block
+=
i_stride
;
}
for
(
y
=
0
;
y
<
8
;
y
++
)
{
for
(
x
=
0
;
x
<
8
;
x
++
)
{
p_sav
[
x
]
=
i_flt
[
y
][
x
];
}
p_sav
+=
i_stride
;
}
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* ----------------- Dering filter on Y and C blocks ----------------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_dering_Y
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
)
{
int
x
,
y
,
k
;
int
i_max
[
4
],
i_min
[
4
],
i_range
[
4
];
int
i_thr
[
4
];
int
i_max_range
,
i_kmax
;
uint32_t
i_bin
[
4
][
10
];
uint8_t
*
p_block
[
4
];
QT_STORE_T
*
p_QP
;
/* We process 4 blocks/loop*/
for
(
y
=
8
;
y
<
i_height
-
8
;
y
+=
16
)
{
/* +---+
|0|1|
+-+-+ :))
|2|3|
+-+-+ */
p_block
[
0
]
=
p_plane
+
y
*
i_stride
+
8
;
p_block
[
1
]
=
p_block
[
0
]
+
8
;
p_block
[
2
]
=
p_block
[
0
]
+
(
i_stride
<<
3
);
p_block
[
3
]
=
p_block
[
2
]
+
8
;
for
(
x
=
8
;
x
<
i_width
-
8
;
x
+=
16
)
{
/* 1: Calculate threshold */
/* Calculate max/min for each block */
pp_dering_MinMax
(
p_block
[
0
],
i_stride
,
&
i_min
[
0
],
&
i_max
[
0
]
);
pp_dering_MinMax
(
p_block
[
1
],
i_stride
,
&
i_min
[
1
],
&
i_max
[
1
]
);
pp_dering_MinMax
(
p_block
[
2
],
i_stride
,
&
i_min
[
2
],
&
i_max
[
2
]
);
pp_dering_MinMax
(
p_block
[
3
],
i_stride
,
&
i_min
[
3
],
&
i_max
[
3
]
);
/* Calculate range, max_range and thr */
i_max_range
=
0
;
i_kmax
=
0
;
for
(
k
=
0
;
k
<=
4
;
k
++
)
{
i_range
[
k
]
=
i_max
[
k
]
-
i_min
[
k
];
i_thr
[
k
]
=
(
i_max
[
k
]
+
i_min
[
k
]
+
1
)
/
2
;
if
(
i_max_range
<
i_max
[
k
])
{
i_max_range
=
i_max
[
k
];
i_kmax
=
k
;
}
}
/* Now rearrange thr */
if
(
i_max_range
>
64
)
{
for
(
k
=
1
;
k
<
5
;
k
++
)
{
if
(
i_range
[
k
]
<
16
)
{
i_thr
[
k
]
=
0
;
}
else
if
(
i_range
[
k
]
<
32
)
{
i_thr
[
k
]
=
i_thr
[
i_kmax
];
}
}
}
else
{
for
(
k
=
1
;
k
<
5
;
k
++
)
{
if
(
i_range
[
k
]
<
16
)
{
i_thr
[
k
]
=
0
;
}
}
}
/* 2: Index acquisition 10x10 ! so " -i_stride - 1"*/
pp_dering_BinIndex
(
p_block
[
0
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
0
],
i_bin
[
0
]
);
pp_dering_BinIndex
(
p_block
[
1
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
1
],
i_bin
[
1
]
);
pp_dering_BinIndex
(
p_block
[
2
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
2
],
i_bin
[
2
]
);
pp_dering_BinIndex
(
p_block
[
3
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
3
],
i_bin
[
3
]
);
/* 3: adaptive smoothing */
/* since we begin at (8,8) QP can be different for each block */
p_QP
=
&
(
p_QP_store
[(
y
>>
4
)
*
i_QP_stride
+
(
x
>>
4
)]
);
pp_dering_Filter
(
p_block
[
0
],
i_stride
,
i_bin
[
0
],
p_QP
[
0
]
);
pp_dering_Filter
(
p_block
[
1
],
i_stride
,
i_bin
[
1
],
p_QP
[
1
]
);
pp_dering_Filter
(
p_block
[
2
],
i_stride
,
i_bin
[
2
],
p_QP
[
i_QP_stride
]
);
pp_dering_Filter
(
p_block
[
3
],
i_stride
,
i_bin
[
3
],
p_QP
[
i_QP_stride
+
1
]
);
p_block
[
0
]
+=
8
;
p_block
[
1
]
+=
8
;
p_block
[
2
]
+=
8
;
p_block
[
3
]
+=
8
;
}
}
}
void
E_
(
pp_dering_C
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
)
{
int
x
,
y
;
int
i_max
,
i_min
;
int
i_thr
;
uint32_t
i_bin
[
10
];
uint8_t
*
p_block
;
for
(
y
=
8
;
y
<
i_height
-
8
;
y
+=
8
)
{
p_block
=
p_plane
+
y
*
i_stride
+
8
;
for
(
x
=
8
;
x
<
i_width
-
8
;
x
+=
8
)
{
/* 1: Calculate threshold */
/* Calculate max/min for each block */
pp_dering_MinMax
(
p_block
,
i_stride
,
&
i_min
,
&
i_max
);
/* Calculate thr*/
i_thr
=
(
i_max
+
i_min
+
1
)
/
2
;
/* 2: Index acquisition 10x10 */
/* point on 10x10 in wich we have our 8x8 block */
pp_dering_BinIndex
(
p_block
-
i_stride
-
1
,
i_stride
,
i_thr
,
i_bin
);
/* 3: adaptive smoothing */
pp_dering_Filter
(
p_block
,
i_stride
,
i_bin
,
p_QP_store
[(
y
>>
5
)
*
i_QP_stride
+
(
x
>>
5
)]);
p_block
+=
8
;
}
}
}
modules/codec/ffmpeg/postprocessing/postprocessing_common.h
deleted
100644 → 0
View file @
b15eda88
/*****************************************************************************
* postprocessing_common.h
*****************************************************************************
* Copyright (C) 2001 VideoLAN
* $Id: postprocessing_common.h,v 1.2 2002/08/08 22:28:22 sam Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#if 0
#define PP_USE_3DNOW /* Nothing done yet */
#define PP_USE_MMX /* when only MMX is supported */
#define PP_USE_MMXEXT /* when MMXEXT is also supported, imply MMX */
#endif
/* thresholds for deblocking, I've taken value given by ISO */
#define PP_THR1 2ULL
/* threshold for deblocking */
#define PP_2xTHR1 ( 2 * PP_THR1 )
/* internal usage */
#define PP_THR2 6ULL
/* Some usefull macros */
#define PP_MAX( a, b ) ( a > b ? (a) : (b) )
#define PP_MIN( a, b ) ( a < b ? (a) : (b) )
#define PP_ABS( x ) ( ( x < 0 ) ? (-(x)) : (x) )
#define PP_SGN( x ) ( ( x < 0 ) ? -1 : 1 )
#define PP_MIN3( a, b, c ) ( PP_MIN( (a), PP_MIN( (b), (c) ) ) )
#define PP_CLIP( x, a, b ) ( PP_MAX( (a), PP_MIN( (x), (b) ) ) )
void
E_
(
pp_deblock_V
)();
void
E_
(
pp_deblock_H
)();
void
E_
(
pp_dering_Y
)();
void
E_
(
pp_dering_C
)();
modules/codec/ffmpeg/postprocessing/postprocessing_mmx.c
deleted
100644 → 0
View file @
b15eda88
/*****************************************************************************
* postprocessing_mmx.c: Post Processing library in MMX
*****************************************************************************
* Copyright (C) 2001 VideoLAN
* $Id: postprocessing_mmx.c,v 1.3 2002/12/18 14:17:10 sam Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include <vlc/vlc.h>
/* only use uint8_t, uint32_t .... */
#include "postprocessing.h"
#include "postprocessing_common.h"
/*****************************************************************************
*
* Internals functions common to pp_Deblock_V and pp_Deblock_H
*
*****************************************************************************/
/*****************************************************************************
* MMX stuff
*****************************************************************************/
/* XXX PP_THR1 need to be defined as ULL */
/* Use same things as in idct but how it work ? */
#define UNUSED_LONGLONG( foo ) \
static const unsigned long long foo __asm__ (#foo) __attribute__((unused))
/* to calculate isDC_mode for mmx */
UNUSED_LONGLONG
(
mmx_127_thr1
)
=
(
(
127ULL
-
PP_THR1
)
<<
56
)
|
(
(
127ULL
-
PP_THR1
)
<<
48
)
|
(
(
127ULL
-
PP_THR1
)
<<
40
)
|
(
(
127ULL
-
PP_THR1
)
<<
32
)
|
(
(
127ULL
-
PP_THR1
)
<<
24
)
|
(
(
127ULL
-
PP_THR1
)
<<
16
)
|
(
(
127ULL
-
PP_THR1
)
<<
8
)
|
(
(
127ULL
-
PP_THR1
)
);
UNUSED_LONGLONG
(
mmx_127_2xthr1_1
)
=
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
56
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
48
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
40
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
32
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
24
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
16
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
8
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
);
UNUSED_LONGLONG
(
mmx_m2_5_m5_2
)
=
0xfffe0005fffb0002ULL
;
#if 0
/* find min bytes from r ans set it in r, t is destroyed */
#define MMXEXT_GET_PMIN( r, t ) \
"movq " #r ", " #t " \n" \
"psrlq $8, " #t " \n" \
"pminub " #t ", " #r " \n" \
"pshufw $0xf5, " #r ", " #t " #instead of shift with tmp reg \n" \
"pminub " #t ", " #r " \n" \
"pshufw $0xfe, " #r ", " #t " \n" \
"pminub " #t ", " #r " \n"
/* find mzx bytes from r ans set it in r, t is destroyed */
#define MMXEXT_GET_PMAX( r, t ) \
"movq " #r ", " #t " \n" \
"psrlq $8, " #t " \n" \
"pmaxub " #t ", " #r " \n" \
"pshufw $0xf5, " #r ", " #t " \n" \
"pmaxub " #t ", " #r " \n" \
"pshufw $0xfe, " #r ", " #t " \n" \
"pmaxub " #t ", " #r " \n"
#define MMXEXT_GET_LMINMAX( s, m, M, t ) \
"movq " #s ", " #t " \n" \
"pminub " #t ", " #m " \n" \
"pmaxub " #t ", " #M " \n"
/* Some tips for MMX
* |a-b| :
d1 = a - b with unsigned saturate
d2 = b - a with ...
|a-b| = d1 | d2
*/
#endif
/****************************************************************************
* pp_deblock_isDC_mode : Check if we will use DC mode or Default mode
****************************************************************************
* Use constant PP_THR1 and PP_THR2 ( PP_2xTHR1 )
*
* Called for for each pixel on a boundary block when doing deblocking
* so need to be fast ...
*
****************************************************************************/
static
inline
int
pp_deblock_isDC_mode
(
uint8_t
*
p_v
)
{
unsigned
int
i_eq_cnt
;
/* algo :
x = v[i] - v[i+1] without signed saturation
( XXX see if there is'nt problem, but can't be with signed
sat because pixel will be saturate :(
so x within [-128, 127] and we have to test if it fit in [-M, M]
we add 127-M with wrap around -> good value fit in [ 127-2*M, 127]
and if x >= 127 - 2 * M ie x > 127 -2*M - 1 value is good
*/
__asm__
__volatile__
(
" #* Do (v0-v1) to (v7-v8)
\n
"
"movq (%1), %%mm1 # load v0->v7
\n
"
"movq 1(%1), %%mm2 # load v1->v8
\n
"
"psubb %%mm2, %%mm1 # v[i]-v[i+1]
\n
"
"paddb mmx_127_thr1, %%mm1 # + 127-THR1 with wrap
\n
"
"pcmpgtb mmx_127_2xthr1_1, %%mm1 # > 127 -2*thr1 - 1
\n
"
"movq %%mm1, %%mm0 #
\n
"
"psrlw $8, %%mm1 #
\n
"
"paddb %%mm1, %%mm0 #
\n
"
" #
\n
"
"movq %%mm0, %%mm1 # Now sum to create eq_cnt
\n
"
"psrld $16, %%mm0 #
\n
"
"paddb %%mm0, %%mm1 #
\n
"
" #
\n
"
"movq %%mm1, %%mm0 #
\n
"
"psrlq $32, %%mm1 #
\n
"
"paddb %%mm1, %%mm0
\n
"
"movd %%mm0, %0 #
\n
"
"negl %0
\n
"
"andl $255, %0"
:
"=r"
(
i_eq_cnt
)
:
"r"
(
p_v
)
);
/* last test, hey, 9 don't fit in MMX */
if
((
(
p_v
[
8
]
-
p_v
[
9
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
{
i_eq_cnt
++
;
}
#if 0
/* algo : if ( | v[i] -v[i+1] | <= PP_THR1 ) { i_eq_cnt++; } */
i_eq_cnt = 0;
for( i =0; i < 9; i++ )
{
if(( ( p_v[i] - p_v[i+1] + PP_THR1 )&0xffff )<= PP_2xTHR1 )
{
i_eq_cnt++;
}
}
#endif
return
(
(
i_eq_cnt
>=
PP_THR2
)
?
1
:
0
);
}
static
inline
int
pp_deblock_isMinMaxOk
(
uint8_t
*
p_v
,
int
i_QP
)
{
int
i_range
;
#if 0
__asm__ __volatile__ (
"movq 1(%1), %%mm0 # 8 bytes \n"
"movq %%mm0, %%mm1 \n"
MMXEXT_GET_PMIN( %%mm0, %%mm7 )
MMXEXT_GET_PMAX( %%mm1, %%mm7 )
"psubd %%mm0, %%mm1 # max - min \n"
"movd %%mm1, %0 \n"
"andl $255, %0" : "=r"(i_range) : "r"(p_v) );
#endif
int
i_max
,
i_min
;
int
i
;
i_min
=
i_max
=
p_v
[
1
];
for
(
i
=
2
;
i
<
9
;
i
++
)
{
if
(
i_max
<
p_v
[
i
]
)
i_max
=
p_v
[
i
];
if
(
i_min
>
p_v
[
i
]
)
i_min
=
p_v
[
i
];
}
i_range
=
i_max
-
i_min
;
return
(
i_range
<
2
*
i_QP
?
1
:
0
);
}
static
inline
void
pp_deblock_DefaultMode
(
uint8_t
i_v
[
10
],
int
i_stride
,
int
i_QP
)
{
int
d
,
i_delta
;
int
a3x0
,
a3x0_
,
a3x1
,
a3x2
;
int
b_neg
;
/* d = CLIP( 5(a3x0' - a3x0)//8, 0, (v4-v5)/2 ).d( abs(a3x0) < QP ) */
/* First calculate a3x0 */
__asm__
__volatile__
(
"pxor %%mm7, %%mm7 # mm7 = 0
\n
"
"movq mmx_m2_5_m5_2, %%mm6 # mm6 =(2,-5,5,-2)
\n
"
"movd 3(%1), %%mm0
\n
"
"punpcklbw %%mm7,%%mm0
\n
"
"pmaddwd %%mm6, %%mm0
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $32, %%mm1
\n
"
"paddd %%mm1, %%mm0
\n
"
"movd %%mm0, %0"
:
"=r"
(
a3x0
)
:
"r"
(
i_v
)
);
#if 0
a3x0 = 2 * ( i_v[3] - i_v[6] ) + 5 *( i_v[5] - i_v[4] );
#endif
if
(
a3x0
<
0
)
{
b_neg
=
1
;
a3x0
=
-
a3x0
;
}
else
{
b_neg
=
0
;
}
/* XXX Now a3x0 is abs( a3x0 ) */
if
(
(
a3x0
<
8
*
i_QP
)
&&
(
a3x0
!=
0
)
)
/* |a3x0| < 8*i_QP */
{
/* calculate a3x1 et a3x2 */
__asm__
__volatile__
(
" # mm7 = 0
\n
"
" # mm6 = ( 2, -5, 5, -2 )
\n
"
"movd 1(%2), %%mm0
\n
"
"movd 5(%2), %%mm2
\n
"
"punpcklbw %%mm7,%%mm0
\n
"
"punpcklbw %%mm7,%%mm2
\n
"
"pmaddwd %%mm6, %%mm0
\n
"
"pmaddwd %%mm6, %%mm2
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $32, %%mm1
\n
"
"paddd %%mm1, %%mm0 # mm0 = a3x1
\n
"
"movd %%mm0, %0
\n
"
"movq %%mm2, %%mm1
\n
"
"psrlq $32, %%mm1
\n
"
"paddd %%mm1, %%mm2 # mm2 = a3x2
\n
"
"movd %%mm2, %1
\n
"
:
"=r"
(
a3x1
),
"=r"
(
a3x2
)
:
"r"
(
i_v
)
);
#if 0
a3x1 = 2 * ( i_v[1] - i_v[4] ) + 5 * ( i_v[3] - i_v[2] );
a3x2 = 2 * ( i_v[5] - i_v[8] ) + 5 * ( i_v[7] - i_v[6] );
#endif
if
(
a3x1
<
0
)
a3x1
=
-
a3x1
;
/* abs( a3x1 ) */
if
(
a3x2
<
0
)
a3x2
=
-
a3x2
;
/* abs( a3x2 ) */
a3x0_
=
PP_MIN3
(
a3x0
,
a3x1
,
a3x2
);
d
=
5
*
(
a3x0
-
a3x0_
)
/
8
;
/* always > 0 */
i_delta
=
(
i_v
[
4
]
-
i_v
[
5
]
)
/
2
;
/* clip into [0, i_delta] or [i_delta, 0] */
if
(
i_delta
<
0
)
{
if
(
!
b_neg
)
/* since true d has sgn(d) = - sgn( a3x0 ) */
{
d
=
-
d
;
if
(
d
<
i_delta
)
d
=
i_delta
;
i_v
[
4
]
-=
d
;
i_v
[
5
]
+=
d
;
}
}
else
{
if
(
b_neg
)
{
if
(
d
>
i_delta
)
d
=
i_delta
;
i_v
[
4
]
-=
d
;
i_v
[
5
]
+=
d
;
}
}
}
}
static
inline
void
pp_deblock_DCMode
(
uint8_t
*
p_v
,
/* = int i_v[10] */
int
i_QP
)
{
int
i_p0
,
i_p9
;
i_p0
=
PP_ABS
(
p_v
[
1
]
-
p_v
[
0
]
)
<
i_QP
?
p_v
[
0
]
:
p_v
[
1
];
i_p9
=
PP_ABS
(
p_v
[
8
]
-
p_v
[
9
]
)
<
i_QP
?
p_v
[
9
]
:
p_v
[
8
];
/* mm0 = 8 pix unmodified
-We will process first 4 pixel
mm0 = 8 pix unmodified
mm1 = for the first part of the 4 first pix
(v1) -> (p0) -> ... ( word )
(v2) (v1)
(v3) (v2)
(v4) (v3)
= for the commoin part between first and last pix
(v2) -> (v3) -> ... ( word )
(v3) (v4)
(v4) (v5)
(v5) (v6)
= for the last part of the 4 last pix
(v5) -> (v6) -> ... ( word )
(v6) (v7)
(v7) (v8)
(v8) (p9)
mm2 = acu for first new pix
mm3 = acu for last pix
mm4 = unused
mm5 = p0
mm6 = p9 << 48
mm7 = 0 */
__asm__
__volatile__
(
"pxor %%mm7, %%mm7
\n
"
"movq 1(%0), %%mm0 # get 8 pix
\n
"
" # unpack into mm1
\n
"
"movq %%mm0, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1
\n
"
" # get p_0 and i_p9
\n
"
"movd %1, %%mm5
\n
"
"movd %2, %%mm6
\n
"
"psllq $48, %%mm6
\n
"
"
\n
"
"movq %%mm1, %%mm3 # p_v[5-8] = v[1-4] !!
\n
"
"movq %%mm1, %%mm2
\n
"
"psllw $2, %%mm2 # p_v[1-4] = 4*v[1-4]
\n
"
"
\n
"
"psllq $16, %%mm1
\n
"
"por %%mm5, %%mm1 # mm1 =( p0, v1, v2 ,v3)
\n
"
"
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm2
\n
"
"
\n
"
"psllq $16, %%mm1
\n
"
"por %%mm5, %%mm1 # mm1 =( p0, p0, v1, v2)
\n
"
"
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm2
\n
"
"
\n
"
"psllq $16, %%mm1
\n
"
"por %%mm5, %%mm1 # mm1 =( p0, p0, p0, v1)
\n
"
"
\n
"
"paddw %%mm1, %%mm2
\n
"
"
\n
"
"psllq $16, %%mm1
\n
"
"por %%mm5, %%mm1 # mm1 =( p0, p0, p0, p0)
\n
"
"
\n
"
"paddw %%mm1, %%mm2
\n
"
" # Now last part a little borring
\n
"
" # last part for mm2, beginig for mm3
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $8, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v2, v3, v4, v5 )
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $16, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v3, v4, v5, v6 )
\n
"
"psllw $1, %%mm1
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $24, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v4, v5, v6, v7)
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm3
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $32, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v5, v6, v7, v8)
\n
"
"paddw %%mm1, %%mm2
\n
"
"psllw $2, %%mm1
\n
"
"paddw %%mm1, %%mm3
\n
"
" # Now last part for last 4 pix
\n
"
" #
\n
"
"movq %%mm0, %%mm1
\n
"
"punpckhbw %%mm7, %%mm1 # mm1 = ( v5, v6, v7, v8)
\n
"
"
\n
"
"psrlq $16, %%mm1
\n
"
"por %%mm6, %%mm1 # mm1 =( v6, v7, v8, p9 )
\n
"
"
\n
"
"paddw %%mm1, %%mm3
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"psrlq $16, %%mm1
\n
"
"por %%mm6, %%mm1 # mm1 =( v7, v8, p9, p9)
\n
"
"
\n
"
"paddw %%mm1, %%mm3
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"psrlq $16, %%mm1
\n
"
"por %%mm6, %%mm1 # mm1 =( v8, p9, p9, p9 )
\n
"
"
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"psrlq $16, %%mm1
\n
"
"por %%mm6, %%mm1 # mm1 =( p9, p9, p9, p9 )
\n
"
"
\n
"
"paddw %%mm1, %%mm3
\n
"
"psrlw $4, %%mm2
\n
"
"psrlw $4, %%mm3
\n
"
"packuswb %%mm3, %%mm2
\n
"
"movq %%mm2, 1(%0)
\n
"
:
:
"r"
(
p_v
),
"r"
(
i_p0
),
"r"
(
i_p9
)
:
"memory"
);
#if 0
for( i = 1; i < 9; i++ )
{
v[i] = p_v[i]; /* save 8 pix that will be modified */
}
p_v[1] = ( 6 * i_p0 + 4 * v[1]
+ 2 *( v[2] + v[3]) + v[4] + v[5]) >> 4;
p_v[2] = ( 4 * i_p0 + 2 * v[1] + 4 * v[2]
+ 2 *( v[3] + v[4]) + v[5] + v[6]) >> 4;
p_v[3] = ( 2 * i_p0 + 2 * (v[1] + v[2]) + 4 * v[3]
+ 2 *( v[4] + v[5]) + v[6] + v[7]) >> 4;
p_v[4] = ( i_p0 + v[1] + 2 * (v[2] + v[3]) + 4 * v[4]
+ 2 *( v[5] + v[6]) + v[7] + v[8]) >> 4;
p_v[5] = ( v[1] + v[2] + 2 * (v[3] + v[4]) + 4 * v[5]
+ 2 *( v[6] + v[7]) + v[8] + i_p9) >> 4;
p_v[6] = ( v[2] + v[3] + 2 * (v[4] + v[5]) + 4 * v[6]
+ 2 *( v[7] + v[8]) + 2 * i_p9) >> 4;
p_v[7] = ( v[3] + v[4] + 2 * (v[5] + v[6]) + 4 * v[7]
+ 2 * v[8] + 4 * i_p9) >> 4;
p_v[8] = ( v[4] + v[5] + 2 * (v[6] + v[7]) + 4 * v[8]
+ 6 * i_p9) >> 4;
#endif
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* ---------- filter Vertical lines so follow horizontal edges -------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_deblock_V
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
,
int
b_chroma
)
{
int
x
,
y
,
i
;
uint8_t
*
p_v
;
int
i_QP_scale
;
/* use to do ( ? >> i_QP_scale ) */
int
i_QP
;
uint8_t
i_v
[
10
];
i_QP_scale
=
b_chroma
?
5
:
4
;
for
(
y
=
8
;
y
<
i_height
-
4
;
y
+=
8
)
{
p_v
=
p_plane
+
(
y
-
5
)
*
i_stride
;
for
(
x
=
0
;
x
<
i_width
;
x
++
)
{
/* First get 10 vert pix to use them without i_stride */
for
(
i
=
0
;
i
<
10
;
i
++
)
{
i_v
[
i
]
=
p_v
[
i
*
i_stride
+
x
];
}
i_QP
=
p_QP_store
[(
y
>>
i_QP_scale
)
*
i_QP_stride
+
(
x
>>
i_QP_scale
)];
/* XXX QP is for v5 */
if
(
pp_deblock_isDC_mode
(
i_v
)
)
{
if
(
pp_deblock_isMinMaxOk
(
i_v
,
i_QP
)
)
{
pp_deblock_DCMode
(
i_v
,
i_QP
);
}
}
else
{
pp_deblock_DefaultMode
(
i_v
,
i_stride
,
i_QP
);
}
/* Copy back, XXX only 1-8 were modified */
for
(
i
=
1
;
i
<
9
;
i
++
)
{
p_v
[
i
*
i_stride
+
x
]
=
i_v
[
i
];
}
}
}
return
;
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* --------- filter Horizontal lines so follow vertical edges -------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_deblock_H
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
,
int
b_chroma
)
{
int
x
,
y
;
uint8_t
*
p_v
;
int
i_QP_scale
;
int
i_QP
;
i_QP_scale
=
b_chroma
?
5
:
4
;
for
(
y
=
0
;
y
<
i_height
;
y
++
)
{
p_v
=
p_plane
+
y
*
i_stride
-
5
;
for
(
x
=
8
;
x
<
i_width
-
4
;
x
+=
8
)
{
/* p_v point 5 pix before a block boundary */
/* XXX QP is for v5 */
i_QP
=
p_QP_store
[(
y
>>
i_QP_scale
)
*
i_QP_stride
+
(
x
>>
i_QP_scale
)];
if
(
pp_deblock_isDC_mode
(
p_v
+
x
)
)
{
if
(
pp_deblock_isMinMaxOk
(
p_v
+
x
,
i_QP
)
)
{
pp_deblock_DCMode
(
p_v
+
x
,
i_QP
);
}
}
else
{
pp_deblock_DefaultMode
(
p_v
+
x
,
i_stride
,
i_QP
);
}
}
}
return
;
}
/*****************************************************************************
*
* Internals functions common to pp_Dering_Y pp_Dering_C
*
*****************************************************************************/
static
inline
void
pp_dering_MinMax
(
uint8_t
*
p_block
,
int
i_stride
,
int
*
pi_min
,
int
*
pi_max
)
{
int
x
,
y
;
int
i_min
,
i_max
;
#if 0
/* First we will extract min/max for each pix on vertical line
and next extract global min/max */
__asm__ __volatile__(
"leal (%2,%3), %%eax \n"
"movq (%2), %%mm0 #load line \n"
"movq %%mm0, %%mm1 \n"
MMXEXT_GET_LMINMAX( (%%eax), %%mm0, %%mm1, %%mm7 )
MMXEXT_GET_LMINMAX( (%%eax, %3), %%mm0, %%mm1, %%mm7 )
MMXEXT_GET_LMINMAX( (%%eax, %3,2), %%mm0, %%mm1, %%mm7 )
MMXEXT_GET_LMINMAX( (%2, %3, 4), %%mm0, %%mm1, %%mm7 )
"leal (%%eax,%3,4), %%eax \n"
MMXEXT_GET_LMINMAX( (%%eax), %%mm0, %%mm1, %%mm7 )
MMXEXT_GET_LMINMAX( (%%eax, %3), %%mm0, %%mm1, %%mm7 )
MMXEXT_GET_LMINMAX( (%%eax, %3,2), %%mm0, %%mm1, %%mm7 )
MMXEXT_GET_PMIN( %%mm0, %%mm7 )
MMXEXT_GET_PMAX( %%mm1, %%mm7 )
"movd %%mm0, %%eax \n"
"andl $255, %%eax \n"
"movl %%eax, (%0) \n"
"movd %%mm1, %%eax \n"
"andl $255, %%eax \n"
"movl %%eax, (%1) \n"
: : "r"(pi_min), "r"(pi_max), "r"(p_block), "r"(i_stride) : "%eax", "memory" );
#endif
i_min
=
255
;
i_max
=
0
;
for
(
y
=
0
;
y
<
8
;
y
++
)
{
for
(
x
=
0
;
x
<
8
;
x
++
)
{
if
(
i_min
>
p_block
[
x
]
)
i_min
=
p_block
[
x
];
if
(
i_max
<
p_block
[
x
]
)
i_max
=
p_block
[
x
];
}
p_block
+=
i_stride
;
}
*
pi_min
=
i_min
;
*
pi_max
=
i_max
;
}
static
inline
void
pp_dering_BinIndex
(
uint8_t
*
p_block
,
int
i_stride
,
int
i_thr
,
uint32_t
*
p_bin
)
{
int
x
,
y
;
uint32_t
i_bin
;
for
(
y
=
0
;
y
<
10
;
y
++
)
{
i_bin
=
0
;
for
(
x
=
0
;
x
<
10
;
x
++
)
{
if
(
p_block
[
x
]
>
i_thr
)
{
i_bin
|=
1
<<
x
;
}
}
i_bin
|=
(
~
i_bin
)
<<
16
;
/* for detect also three 0 */
*
p_bin
=
i_bin
&
(
i_bin
>>
1
)
&
(
i_bin
<<
1
);
p_block
+=
i_stride
;
p_bin
++
;
}
}
static
inline
void
pp_dering_Filter
(
uint8_t
*
p_block
,
int
i_stride
,
uint32_t
*
p_bin
,
int
i_QP
)
{
int
x
,
y
;
uint32_t
i_bin
;
int
i_flt
[
8
][
8
];
int
i_f
;
uint8_t
*
p_sav
;
int
i_QP_2
;
p_sav
=
p_block
;
i_QP_2
=
i_QP
>>
1
;
for
(
y
=
0
;
y
<
8
;
y
++
)
{
i_bin
=
p_bin
[
y
]
&
p_bin
[
y
+
1
]
&
p_bin
[
y
+
2
];
/* To be optimised */
i_bin
|=
i_bin
>>
16
;
/* detect 0 or 1 */
for
(
x
=
0
;
x
<
8
;
x
++
)
{
if
(
i_bin
&
0x02
)
/* 0x02 since 10 index but want 1-9 */
{
/* apply dering */
/* 1 2 1
2 4 2 + (8)
1 2 1 */
i_f
=
p_block
[
x
-
i_stride
-
1
]
+
(
p_block
[
x
-
i_stride
]
<<
1
)
+
p_block
[
x
-
i_stride
+
1
]
+
(
p_block
[
x
-
1
]
<<
1
)
+
(
p_block
[
x
]
<<
2
)
+
(
p_block
[
x
+
1
]
<<
1
)
+
p_block
[
x
+
i_stride
-
1
]
+
(
p_block
[
x
+
i_stride
]
<<
1
)
+
p_block
[
x
+
i_stride
+
1
];
i_f
=
(
8
+
i_f
)
>>
4
;
/* Clamp this value */
if
(
i_f
-
p_block
[
x
]
>
(
i_QP_2
)
)
{
i_flt
[
y
][
x
]
=
p_block
[
x
]
+
i_QP_2
;
}
else
if
(
i_f
-
p_block
[
x
]
<
-
i_QP_2
)
{
i_flt
[
y
][
x
]
=
p_block
[
x
]
-
i_QP_2
;
}
else
{
i_flt
[
y
][
x
]
=
i_f
;
}
}
else
{
i_flt
[
y
][
x
]
=
p_block
[
x
];
}
i_bin
>>=
1
;
}
p_block
+=
i_stride
;
}
for
(
y
=
0
;
y
<
8
;
y
++
)
{
for
(
x
=
0
;
x
<
8
;
x
++
)
{
p_sav
[
x
]
=
i_flt
[
y
][
x
];
}
p_sav
+=
i_stride
;
}
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* ----------------- Dering filter on Y and C blocks ----------------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_dering_Y
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
)
{
int
x
,
y
,
k
;
int
i_max
[
4
],
i_min
[
4
],
i_range
[
4
];
int
i_thr
[
4
];
int
i_max_range
,
i_kmax
;
uint32_t
i_bin
[
4
][
10
];
uint8_t
*
p_block
[
4
];
QT_STORE_T
*
p_QP
;
/* We process 4 blocks/loop*/
for
(
y
=
8
;
y
<
i_height
-
8
;
y
+=
16
)
{
/* +---+
|0|1|
+-+-+ :))
|2|3|
+-+-+ */
p_block
[
0
]
=
p_plane
+
y
*
i_stride
+
8
;
p_block
[
1
]
=
p_block
[
0
]
+
8
;
p_block
[
2
]
=
p_block
[
0
]
+
(
i_stride
<<
3
);
p_block
[
3
]
=
p_block
[
2
]
+
8
;
for
(
x
=
8
;
x
<
i_width
-
8
;
x
+=
16
)
{
/* 1: Calculate threshold */
/* Calculate max/min for each block */
pp_dering_MinMax
(
p_block
[
0
],
i_stride
,
&
i_min
[
0
],
&
i_max
[
0
]
);
pp_dering_MinMax
(
p_block
[
1
],
i_stride
,
&
i_min
[
1
],
&
i_max
[
1
]
);
pp_dering_MinMax
(
p_block
[
2
],
i_stride
,
&
i_min
[
2
],
&
i_max
[
2
]
);
pp_dering_MinMax
(
p_block
[
3
],
i_stride
,
&
i_min
[
3
],
&
i_max
[
3
]
);
/* Calculate range, max_range and thr */
i_max_range
=
0
;
i_kmax
=
0
;
for
(
k
=
0
;
k
<=
4
;
k
++
)
{
i_range
[
k
]
=
i_max
[
k
]
-
i_min
[
k
];
i_thr
[
k
]
=
(
i_max
[
k
]
+
i_min
[
k
]
+
1
)
/
2
;
if
(
i_max_range
<
i_max
[
k
])
{
i_max_range
=
i_max
[
k
];
i_kmax
=
k
;
}
}
/* Now rearrange thr */
if
(
i_max_range
>
64
)
{
for
(
k
=
1
;
k
<
5
;
k
++
)
{
if
(
i_range
[
k
]
<
16
)
{
i_thr
[
k
]
=
0
;
}
else
if
(
i_range
[
k
]
<
32
)
{
i_thr
[
k
]
=
i_thr
[
i_kmax
];
}
}
}
else
{
for
(
k
=
1
;
k
<
5
;
k
++
)
{
if
(
i_range
[
k
]
<
16
)
{
i_thr
[
k
]
=
0
;
}
}
}
/* 2: Index acquisition 10x10 ! so " -i_stride - 1"*/
pp_dering_BinIndex
(
p_block
[
0
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
0
],
i_bin
[
0
]
);
pp_dering_BinIndex
(
p_block
[
1
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
1
],
i_bin
[
1
]
);
pp_dering_BinIndex
(
p_block
[
2
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
2
],
i_bin
[
2
]
);
pp_dering_BinIndex
(
p_block
[
3
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
3
],
i_bin
[
3
]
);
/* 3: adaptive smoothing */
/* since we begin at (8,8) QP can be different for each block */
p_QP
=
&
(
p_QP_store
[(
y
>>
4
)
*
i_QP_stride
+
(
x
>>
4
)]
);
pp_dering_Filter
(
p_block
[
0
],
i_stride
,
i_bin
[
0
],
p_QP
[
0
]
);
pp_dering_Filter
(
p_block
[
1
],
i_stride
,
i_bin
[
1
],
p_QP
[
1
]
);
pp_dering_Filter
(
p_block
[
2
],
i_stride
,
i_bin
[
2
],
p_QP
[
i_QP_stride
]
);
pp_dering_Filter
(
p_block
[
3
],
i_stride
,
i_bin
[
3
],
p_QP
[
i_QP_stride
+
1
]
);
p_block
[
0
]
+=
8
;
p_block
[
1
]
+=
8
;
p_block
[
2
]
+=
8
;
p_block
[
3
]
+=
8
;
}
}
}
void
E_
(
pp_dering_C
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
)
{
int
x
,
y
;
int
i_max
,
i_min
;
int
i_thr
;
uint32_t
i_bin
[
10
];
uint8_t
*
p_block
;
for
(
y
=
8
;
y
<
i_height
-
8
;
y
+=
8
)
{
p_block
=
p_plane
+
y
*
i_stride
+
8
;
for
(
x
=
8
;
x
<
i_width
-
8
;
x
+=
8
)
{
/* 1: Calculate threshold */
/* Calculate max/min for each block */
pp_dering_MinMax
(
p_block
,
i_stride
,
&
i_min
,
&
i_max
);
/* Calculate thr*/
i_thr
=
(
i_max
+
i_min
+
1
)
/
2
;
/* 2: Index acquisition 10x10 */
/* point on 10x10 in wich we have our 8x8 block */
pp_dering_BinIndex
(
p_block
-
i_stride
-
1
,
i_stride
,
i_thr
,
i_bin
);
/* 3: adaptive smoothing */
pp_dering_Filter
(
p_block
,
i_stride
,
i_bin
,
p_QP_store
[(
y
>>
5
)
*
i_QP_stride
+
(
x
>>
5
)]);
p_block
+=
8
;
}
}
}
modules/codec/ffmpeg/postprocessing/postprocessing_mmxext.c
deleted
100644 → 0
View file @
b15eda88
/*****************************************************************************
* postprocessing_mmxext.c: Post Processing plugin MMXEXT
*****************************************************************************
* Copyright (C) 2001 VideoLAN
* $Id: postprocessing_mmxext.c,v 1.5 2002/12/18 14:17:10 sam Exp $
*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include <vlc/vlc.h>
/* only use uint8_t, uint32_t .... */
#include "postprocessing.h"
#include "postprocessing_common.h"
/*****************************************************************************
*
* Internals functions common to pp_Deblock_V and pp_Deblock_H
*
*****************************************************************************/
/*****************************************************************************
* MMX stuff
*****************************************************************************/
/* XXX PP_THR1 need to be defined as ULL */
/* Use same things as in idct but how it work ? */
#define UNUSED_LONGLONG( foo ) \
static const unsigned long long foo __asm__ (#foo) __attribute__((unused))
/* to calculate isDC_mode for mmx */
UNUSED_LONGLONG
(
mmx_thr1
)
=
(
PP_THR1
<<
56
)
|
(
PP_THR1
<<
48
)
|
(
PP_THR1
<<
40
)
|
(
PP_THR1
<<
32
)
|
(
PP_THR1
<<
24
)
|
(
PP_THR1
<<
16
)
|
(
PP_THR1
<<
8
)
|
(
PP_THR1
);
UNUSED_LONGLONG
(
mmx_127_thr1
)
=
(
(
127ULL
-
PP_THR1
)
<<
56
)
|
(
(
127ULL
-
PP_THR1
)
<<
48
)
|
(
(
127ULL
-
PP_THR1
)
<<
40
)
|
(
(
127ULL
-
PP_THR1
)
<<
32
)
|
(
(
127ULL
-
PP_THR1
)
<<
24
)
|
(
(
127ULL
-
PP_THR1
)
<<
16
)
|
(
(
127ULL
-
PP_THR1
)
<<
8
)
|
(
(
127ULL
-
PP_THR1
)
);
UNUSED_LONGLONG
(
mmx_127_2xthr1_1
)
=
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
56
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
48
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
40
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
32
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
24
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
16
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
<<
8
)
|
(
(
127ULL
-
PP_2xTHR1
-
1
)
);
UNUSED_LONGLONG
(
mmx_m2_5_m5_2
)
=
0xfffe0005fffb0002ULL
;
/* find min bytes from r ans set it in r, t is destroyed */
#define MMXEXT_GET_PMIN( r, t ) \
"movq " #r ", " #t " \n" \
"psrlq $8, " #t " \n" \
"pminub " #t ", " #r " \n" \
"pshufw $0xf5, " #r ", " #t " #instead of shift with tmp reg \n" \
"pminub " #t ", " #r " \n" \
"pshufw $0xfe, " #r ", " #t " \n" \
"pminub " #t ", " #r " \n"
/* find mzx bytes from r ans set it in r, t is destroyed */
#define MMXEXT_GET_PMAX( r, t ) \
"movq " #r ", " #t " \n" \
"psrlq $8, " #t " \n" \
"pmaxub " #t ", " #r " \n" \
"pshufw $0xf5, " #r ", " #t " \n" \
"pmaxub " #t ", " #r " \n" \
"pshufw $0xfe, " #r ", " #t " \n" \
"pmaxub " #t ", " #r " \n"
#define MMXEXT_GET_LMINMAX( s, m, M, t ) \
"movq " #s ", " #t " \n" \
"pminub " #t ", " #m " \n" \
"pmaxub " #t ", " #M " \n"
/* Some tips for MMX
* |a-b| :
d1 = a - b with unsigned saturate
d2 = b - a with ...
|a-b| = d1 | d2
*/
/****************************************************************************
* pp_deblock_isDC_mode : Check if we will use DC mode or Default mode
****************************************************************************
* Use constant PP_THR1 and PP_THR2 ( PP_2xTHR1 )
*
* Called for for each pixel on a boundary block when doing deblocking
* so need to be fast ...
*
****************************************************************************/
static
inline
int
pp_deblock_isDC_mode
(
uint8_t
*
p_v
)
{
unsigned
int
i_eq_cnt
;
/* algo :
x = v[i] - v[i+1] without signed saturation
( XXX see if there is'nt problem, but can't be with signed
sat because pixel will be saturate :(
so x within [-128, 127] and we have to test if it fit in [-M, M]
we add 127-M with wrap around -> good value fit in [ 127-2*M, 127]
and if x >= 127 - 2 * M ie x > 127 -2*M - 1 value is good
*/
#if 0
__asm__ __volatile__ (
" #* Do (v0-v1) to (v7-v8) \n"
"movq (%1), %%mm1 # load v0->v7 \n"
"movq 1(%1), %%mm2 # load v1->v8 \n"
"psubb %%mm2, %%mm1 # v[i]-v[i+1] \n"
"paddb mmx_127_thr1, %%mm1 # + 127-THR1 with wrap \n"
"pcmpgtb mmx_127_2xthr1_1, %%mm1 # > 127 -2*thr1 - 1 \n"
"pxor %%mm0, %%mm0 # mm0 = 0 \n"
"psadbw %%mm1, %%mm0 \n"
"movd %%mm0, %0 # \n"
"negl %0 \n"
"andl $255, %0"
: "=r"(i_eq_cnt) : "r" (p_v) );
#endif
__asm__
__volatile__
(
" #* Do (v0-v1) to (v7-v8)
\n
"
"movq (%1), %%mm1 # load v0->v7
\n
"
"pxor %%mm0, %%mm0 # mm0 = 0
\n
"
"movq 1(%1), %%mm2 # load v1->v8
\n
"
"psubb %%mm2, %%mm1 # v[i]-v[i+1]
\n
"
"paddb mmx_127_thr1, %%mm1 # + 127-THR1 with wrap
\n
"
"pcmpgtb mmx_127_2xthr1_1, %%mm1 # > 127 -2*thr1 - 1
\n
"
"psadbw %%mm1, %%mm0
\n
"
"movd %%mm0, %0 #
\n
"
"negl %0"
:
"=r"
(
i_eq_cnt
)
:
"r"
(
p_v
)
);
/* last test, hey, 9 don't fit in MMX */
if
((
(
p_v
[
8
]
-
p_v
[
9
]
+
PP_THR1
)
&
0xffff
)
<=
PP_2xTHR1
)
{
i_eq_cnt
++
;
}
#if 0
/* algo : if ( | v[i] -v[i+1] | <= PP_THR1 ) { i_eq_cnt++; } */
i_eq_cnt = 0;
for( i =0; i < 9; i++ )
{
if(( ( p_v[i] - p_v[i+1] + PP_THR1 )&0xffff )<= PP_2xTHR1 )
{
i_eq_cnt++;
}
}
#endif
return
(
(
i_eq_cnt
>=
PP_THR2
)
?
1
:
0
);
}
static
inline
int
pp_deblock_isMinMaxOk
(
uint8_t
*
p_v
,
int
i_QP
)
{
int
i_range
;
__asm__
__volatile__
(
"movq 1(%1), %%mm0 # 8 bytes
\n
"
"movq %%mm0, %%mm1
\n
"
MMXEXT_GET_PMIN
(
%%
mm0
,
%%
mm7
)
MMXEXT_GET_PMAX
(
%%
mm1
,
%%
mm7
)
"psubd %%mm0, %%mm1 # max - min
\n
"
"movd %%mm1, %0
\n
"
"andl $255, %0"
:
"=r"
(
i_range
)
:
"r"
(
p_v
)
);
#if 0
int i_max, i_min;
int i;
i_min = i_max = p_v[1];
for( i = 2; i < 9; i++ )
{
if( i_max < p_v[i] ) i_max = p_v[i];
if( i_min > p_v[i] ) i_min = p_v[i];
}
i_range = i_max - i_min;
#endif
return
(
i_range
<
2
*
i_QP
?
1
:
0
);
}
static
inline
void
pp_deblock_DefaultMode
(
uint8_t
i_v
[
10
],
int
i_stride
,
int
i_QP
)
{
int
d
,
i_delta
;
int
a3x0
,
a3x0_
,
a3x1
,
a3x2
;
int
b_neg
;
/* d = CLIP( 5(a3x0' - a3x0)//8, 0, (v4-v5)/2 ).d( abs(a3x0) < QP ) */
/* First calculate a3x0 */
__asm__
__volatile__
(
"pxor %%mm7, %%mm7 # mm7 = 0
\n
"
"movq mmx_m2_5_m5_2, %%mm6 # mm6 =(2,-5,5,-2)
\n
"
"movd 3(%1), %%mm0
\n
"
"punpcklbw %%mm7,%%mm0
\n
"
"pmaddwd %%mm6, %%mm0
\n
"
"pshufw $0xfe, %%mm0, %%mm1
\n
"
"paddd %%mm1, %%mm0
\n
"
"movd %%mm0, %0"
:
"=r"
(
a3x0
)
:
"r"
(
i_v
)
);
#if 0
a3x0 = 2 * ( i_v[3] - i_v[6] ) + 5 *( i_v[5] - i_v[4] );
#endif
if
(
a3x0
<
0
)
{
b_neg
=
1
;
a3x0
=
-
a3x0
;
}
else
{
b_neg
=
0
;
}
/* XXX Now a3x0 is abs( a3x0 ) */
if
(
(
a3x0
<
8
*
i_QP
)
&&
(
a3x0
!=
0
)
)
/* |a3x0| < 8*i_QP */
{
/* calculate a3x1 et a3x2 */
__asm__
__volatile__
(
" # mm7 = 0
\n
"
" # mm6 = ( 2, -5, 5, -2 )
\n
"
"movd 1(%2), %%mm0
\n
"
"movd 5(%2), %%mm2
\n
"
"punpcklbw %%mm7,%%mm0
\n
"
"punpcklbw %%mm7,%%mm2
\n
"
"pmaddwd %%mm6, %%mm0
\n
"
"pmaddwd %%mm6, %%mm2
\n
"
"pshufw $0xfe, %%mm0, %%mm1
\n
"
"paddd %%mm1, %%mm0 # mm0 = a3x1
\n
"
"movd %%mm0, %0
\n
"
"pshufw $0xfe, %%mm2, %%mm1
\n
"
"paddd %%mm1, %%mm2 # mm2 = a3x2
\n
"
"movd %%mm2, %1
\n
"
:
"=r"
(
a3x1
),
"=r"
(
a3x2
)
:
"r"
(
i_v
)
);
#if 0
a3x1 = 2 * ( i_v[1] - i_v[4] ) + 5 * ( i_v[3] - i_v[2] );
a3x2 = 2 * ( i_v[5] - i_v[8] ) + 5 * ( i_v[7] - i_v[6] );
#endif
if
(
a3x1
<
0
)
a3x1
=
-
a3x1
;
/* abs( a3x1 ) */
if
(
a3x2
<
0
)
a3x2
=
-
a3x2
;
/* abs( a3x2 ) */
a3x0_
=
PP_MIN3
(
a3x0
,
a3x1
,
a3x2
);
d
=
5
*
(
a3x0
-
a3x0_
)
/
8
;
/* always > 0 */
i_delta
=
(
i_v
[
4
]
-
i_v
[
5
]
)
/
2
;
/* clip into [0, i_delta] or [i_delta, 0] */
if
(
i_delta
<
0
)
{
if
(
!
b_neg
)
/* since true d has sgn(d) = - sgn( a3x0 ) */
{
d
=
-
d
;
if
(
d
<
i_delta
)
d
=
i_delta
;
i_v
[
4
]
-=
d
;
i_v
[
5
]
+=
d
;
}
}
else
{
if
(
b_neg
)
{
if
(
d
>
i_delta
)
d
=
i_delta
;
i_v
[
4
]
-=
d
;
i_v
[
5
]
+=
d
;
}
}
}
}
static
inline
void
pp_deblock_DCMode
(
uint8_t
*
p_v
,
/* = int i_v[10] */
int
i_QP
)
{
int
i_p0
,
i_p9
;
i_p0
=
PP_ABS
(
p_v
[
1
]
-
p_v
[
0
]
)
<
i_QP
?
p_v
[
0
]
:
p_v
[
1
];
i_p9
=
PP_ABS
(
p_v
[
8
]
-
p_v
[
9
]
)
<
i_QP
?
p_v
[
9
]
:
p_v
[
8
];
/* mm0 = 8 pix unmodified
-We will process first 4 pixel
mm0 = 8 pix unmodified
mm1 = for the first part of the 4 first pix
(v1) -> (p0) -> ... ( word )
(v2) (v1)
(v3) (v2)
(v4) (v3)
= for the commoin part between first and last pix
(v2) -> (v3) -> ... ( word )
(v3) (v4)
(v4) (v5)
(v5) (v6)
= for the last part of the 4 last pix
(v5) -> (v6) -> ... ( word )
(v6) (v7)
(v7) (v8)
(v8) (p9)
mm2 = acu for first new pix
mm3 = acu for last pix
mm4 = unused
mm5 = p0
mm6 = p9 << 48
mm7 = 0 */
__asm__
__volatile__
(
"pxor %%mm7, %%mm7
\n
"
"movq 1(%0), %%mm0 # get 8 pix
\n
"
" # unpack into mm1
\n
"
"movq %%mm0, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1
\n
"
" # get p_0 and i_p9
\n
"
"movd %1, %%mm5
\n
"
"movd %2, %%mm6
\n
"
"psllq $48, %%mm6
\n
"
"
\n
"
"movq %%mm1, %%mm3 # p_v[5-8] = v[1-4] !!
\n
"
"movq %%mm1, %%mm2
\n
"
"psllw $2, %%mm2 # p_v[1-4] = 4*v[1-4]
\n
"
"
\n
"
"psllq $16, %%mm1
\n
"
"por %%mm5, %%mm1 # mm1 =( p0, v1, v2 ,v3)
\n
"
"
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm2
\n
"
"
\n
"
"pshufw $0x90,%%mm1,%%mm1 # mm1 =( p0, p0, v1, v2)
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm2
\n
"
"
\n
"
"pshufw $0x90,%%mm1,%%mm1 # mm1 =( p0, p0, p0, v2)
\n
"
"paddw %%mm1, %%mm2
\n
"
"
\n
"
"pshufw $0x90,%%mm1,%%mm1 # mm1 =( p0, p0, p0, p0)
\n
"
"paddw %%mm1, %%mm2
\n
"
" # Now last part a little borring
\n
"
" # last part for mm2, beginig for mm3
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $8, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v2, v3, v4, v5 )
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $16, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v3, v4, v5, v6 )
\n
"
"psllw $1, %%mm1
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $24, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v4, v5, v6, v7)
\n
"
"paddw %%mm1, %%mm2
\n
"
"paddw %%mm1, %%mm3
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"movq %%mm0, %%mm1
\n
"
"psrlq $32, %%mm1
\n
"
"punpcklbw %%mm7, %%mm1 # mm1 =( v5, v6, v7, v8)
\n
"
"paddw %%mm1, %%mm2
\n
"
"psllw $2, %%mm1
\n
"
"paddw %%mm1, %%mm3
\n
"
" # Now last part for last 4 pix
\n
"
" #
\n
"
"movq %%mm0, %%mm1
\n
"
"punpckhbw %%mm7, %%mm1 # mm1 = ( v5, v6, v7, v8)
\n
"
"
\n
"
"psrlq $16, %%mm1
\n
"
"por %%mm6, %%mm1 # mm1 =( v6, v7, v8, p9 )
\n
"
"
\n
"
"paddw %%mm1, %%mm3
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"pshufw $0xf9,%%mm1,%%mm1 # mm1 =( v7, v8, p9, p9)
\n
"
"paddw %%mm1, %%mm3
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"pshufw $0xf9,%%mm1,%%mm1 # mm1 =( v8, p9, p9, p9)
\n
"
"paddw %%mm1, %%mm3
\n
"
"
\n
"
"pshufw $0xf9,%%mm1,%%mm1 # mm1 =( p9, p9, p9, p9)
\n
"
"paddw %%mm1, %%mm3
\n
"
"psrlw $4, %%mm2
\n
"
"psrlw $4, %%mm3
\n
"
"packuswb %%mm3, %%mm2
\n
"
"movq %%mm2, 1(%0)
\n
"
:
:
"r"
(
p_v
),
"r"
(
i_p0
),
"r"
(
i_p9
)
:
"memory"
);
#if 0
for( i = 1; i < 9; i++ )
{
v[i] = p_v[i]; /* save 8 pix that will be modified */
}
p_v[1] = ( 6 * i_p0 + 4 * v[1]
+ 2 *( v[2] + v[3]) + v[4] + v[5]) >> 4;
p_v[2] = ( 4 * i_p0 + 2 * v[1] + 4 * v[2]
+ 2 *( v[3] + v[4]) + v[5] + v[6]) >> 4;
p_v[3] = ( 2 * i_p0 + 2 * (v[1] + v[2]) + 4 * v[3]
+ 2 *( v[4] + v[5]) + v[6] + v[7]) >> 4;
p_v[4] = ( i_p0 + v[1] + 2 * (v[2] + v[3]) + 4 * v[4]
+ 2 *( v[5] + v[6]) + v[7] + v[8]) >> 4;
p_v[5] = ( v[1] + v[2] + 2 * (v[3] + v[4]) + 4 * v[5]
+ 2 *( v[6] + v[7]) + v[8] + i_p9) >> 4;
p_v[6] = ( v[2] + v[3] + 2 * (v[4] + v[5]) + 4 * v[6]
+ 2 *( v[7] + v[8]) + 2 * i_p9) >> 4;
p_v[7] = ( v[3] + v[4] + 2 * (v[5] + v[6]) + 4 * v[7]
+ 2 * v[8] + 4 * i_p9) >> 4;
p_v[8] = ( v[4] + v[5] + 2 * (v[6] + v[7]) + 4 * v[8]
+ 6 * i_p9) >> 4;
#endif
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* ---------- filter Vertical lines so follow horizontal edges -------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_deblock_V
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
,
int
b_chroma
)
{
int
x
,
y
,
i
;
uint8_t
*
p_v
;
int
i_QP_scale
;
/* use to do ( ? >> i_QP_scale ) */
int
i_QP
;
uint8_t
i_v
[
10
];
i_QP_scale
=
b_chroma
?
5
:
4
;
for
(
y
=
8
;
y
<
i_height
-
4
;
y
+=
8
)
{
p_v
=
p_plane
+
(
y
-
5
)
*
i_stride
;
for
(
x
=
0
;
x
<
i_width
;
x
++
)
{
/* First get 10 vert pix to use them without i_stride */
for
(
i
=
0
;
i
<
10
;
i
++
)
{
i_v
[
i
]
=
p_v
[
i
*
i_stride
+
x
];
}
i_QP
=
p_QP_store
[(
y
>>
i_QP_scale
)
*
i_QP_stride
+
(
x
>>
i_QP_scale
)];
/* XXX QP is for v5 */
if
(
pp_deblock_isDC_mode
(
i_v
)
)
{
if
(
pp_deblock_isMinMaxOk
(
i_v
,
i_QP
)
)
{
pp_deblock_DCMode
(
i_v
,
i_QP
);
}
}
else
{
pp_deblock_DefaultMode
(
i_v
,
i_stride
,
i_QP
);
}
/* Copy back, XXX only 1-8 were modified */
for
(
i
=
1
;
i
<
9
;
i
++
)
{
p_v
[
i
*
i_stride
+
x
]
=
i_v
[
i
];
}
}
}
return
;
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* --------- filter Horizontal lines so follow vertical edges -------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_deblock_H
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
,
int
b_chroma
)
{
int
x
,
y
;
uint8_t
*
p_v
;
int
i_QP_scale
;
int
i_QP
;
i_QP_scale
=
b_chroma
?
5
:
4
;
for
(
y
=
0
;
y
<
i_height
;
y
++
)
{
p_v
=
p_plane
+
y
*
i_stride
-
5
;
for
(
x
=
8
;
x
<
i_width
-
4
;
x
+=
8
)
{
/* p_v point 5 pix before a block boundary */
/* XXX QP is for v5 */
i_QP
=
p_QP_store
[(
y
>>
i_QP_scale
)
*
i_QP_stride
+
(
x
>>
i_QP_scale
)];
if
(
pp_deblock_isDC_mode
(
p_v
+
x
)
)
{
if
(
pp_deblock_isMinMaxOk
(
p_v
+
x
,
i_QP
)
)
{
pp_deblock_DCMode
(
p_v
+
x
,
i_QP
);
}
}
else
{
pp_deblock_DefaultMode
(
p_v
+
x
,
i_stride
,
i_QP
);
}
}
}
return
;
}
/*****************************************************************************
*
* Internals functions common to pp_Dering_Y pp_Dering_C
*
*****************************************************************************/
static
inline
void
pp_dering_MinMax
(
uint8_t
*
p_block
,
int
i_stride
,
int
*
pi_min
,
int
*
pi_max
)
{
/* First we will extract min/max for each pix on vertical line
and next extract global min/max */
__asm__
__volatile__
(
"leal (%2,%3), %%eax
\n
"
"movq (%2), %%mm0 #load line
\n
"
"movq %%mm0, %%mm1
\n
"
MMXEXT_GET_LMINMAX
(
(
%%
eax
),
%%
mm0
,
%%
mm1
,
%%
mm7
)
MMXEXT_GET_LMINMAX
(
(
%%
eax
,
%
3
),
%%
mm0
,
%%
mm1
,
%%
mm7
)
MMXEXT_GET_LMINMAX
(
(
%%
eax
,
%
3
,
2
),
%%
mm0
,
%%
mm1
,
%%
mm7
)
MMXEXT_GET_LMINMAX
(
(
%
2
,
%
3
,
4
),
%%
mm0
,
%%
mm1
,
%%
mm7
)
"leal (%%eax,%3,4), %%eax
\n
"
MMXEXT_GET_LMINMAX
(
(
%%
eax
),
%%
mm0
,
%%
mm1
,
%%
mm7
)
MMXEXT_GET_LMINMAX
(
(
%%
eax
,
%
3
),
%%
mm0
,
%%
mm1
,
%%
mm7
)
MMXEXT_GET_LMINMAX
(
(
%%
eax
,
%
3
,
2
),
%%
mm0
,
%%
mm1
,
%%
mm7
)
MMXEXT_GET_PMIN
(
%%
mm0
,
%%
mm7
)
MMXEXT_GET_PMAX
(
%%
mm1
,
%%
mm7
)
"movd %%mm0, %%eax
\n
"
"andl $255, %%eax
\n
"
"movl %%eax, (%0)
\n
"
"movd %%mm1, %%eax
\n
"
"andl $255, %%eax
\n
"
"movl %%eax, (%1)
\n
"
:
:
"r"
(
pi_min
),
"r"
(
pi_max
),
"r"
(
p_block
),
"r"
(
i_stride
)
:
"%eax"
,
"memory"
);
#if 0
i_min = 255; i_max = 0;
for( y = 0; y < 8; y++ )
{
for( x = 0; x < 8; x++ )
{
if( i_min > p_block[x] ) i_min = p_block[x];
if( i_max < p_block[x] ) i_max = p_block[x];
}
p_block += i_stride;
}
*pi_min = i_min;
*pi_max = i_max;
#endif
}
static
inline
void
pp_dering_BinIndex
(
uint8_t
*
p_block
,
int
i_stride
,
int
i_thr
,
uint32_t
*
p_bin
)
{
int
y
;
uint32_t
i_bin
;
/* first create mm7 with all bytes set to thr and mm6 = 0 */
__asm__
__volatile__
(
"movl %0, %%eax
\n
"
"movb %%al, %%ah
\n
"
"movd %%eax, %%mm7
\n
"
"pshufw $0x00, %%mm7, %%mm7
\n
"
"pxor %%mm6, %%mm6
\n
"
:
:
"r"
(
i_thr
)
:
"%eax"
);
for
(
y
=
0
;
y
<
10
;
y
++
)
{
__asm__
__volatile__
(
"movq (%1), %%mm0
\n
"
"psubusb %%mm7, %%mm0
\n
"
/* sat makes that x <= thr --> 0 */
"pcmpeqb %%mm6, %%mm0
\n
"
/* p_block <= i_thr ? -1 : 0 */
"pmovmskb %%mm0, %0
\n
"
/* i_bin msb of each bytes */
:
"=r"
(
i_bin
)
:
"r"
(
p_block
)
);
/* Now last 2 tests */
if
(
p_block
[
8
]
<=
i_thr
)
i_bin
|=
1
<<
8
;
if
(
p_block
[
9
]
<=
i_thr
)
i_bin
|=
1
<<
9
;
i_bin
|=
(
~
i_bin
)
<<
16
;
/* for detect three 1 or three 0*/
*
p_bin
=
(
i_bin
>>
1
)
&&
(
i_bin
)
&&
(
i_bin
<<
1
);
p_block
+=
i_stride
;
p_bin
++
;
}
#if 0
int x, y;
for( y = 0; y < 10; y++ )
{
i_bin = 0;
for( x = 0; x < 10; x++ )
{
if( p_block[x] > i_thr )
{
i_bin |= 1 << x;
}
}
i_bin |= (~i_bin) << 16; /* for detect also three 0 */
*p_bin = i_bin&( i_bin >> 1 )&( i_bin << 1 );
*p_bin = i_bin;
p_block += i_stride;
p_bin++;
}
#endif
}
static
inline
void
pp_dering_Filter
(
uint8_t
*
p_block
,
int
i_stride
,
uint32_t
*
p_bin
,
int
i_QP
)
{
int
x
,
y
;
uint32_t
i_bin
;
uint8_t
i_flt
[
8
][
8
];
int
i_f
;
uint8_t
*
p_sav
;
int
i_QP_2
;
p_sav
=
p_block
;
i_QP_2
=
i_QP
>>
1
;
for
(
y
=
0
;
y
<
8
;
y
++
)
{
i_bin
=
p_bin
[
y
]
&
p_bin
[
y
+
1
]
&
p_bin
[
y
+
2
];
/* To be optimised */
i_bin
|=
i_bin
>>
16
;
/* detect 0 or 1 */
for
(
x
=
0
;
x
<
8
;
x
++
)
{
if
(
i_bin
&
0x02
)
/* 0x02 since 10 index but want 1-9 */
{
/* apply dering */
/* 1 2 1
2 4 2 + (8)
1 2 1 */
i_f
=
p_block
[
x
-
i_stride
-
1
]
+
(
p_block
[
x
-
i_stride
]
<<
1
)
+
p_block
[
x
-
i_stride
+
1
]
+
(
p_block
[
x
-
1
]
<<
1
)
+
(
p_block
[
x
]
<<
2
)
+
(
p_block
[
x
+
1
]
<<
1
)
+
p_block
[
x
+
i_stride
-
1
]
+
(
p_block
[
x
+
i_stride
]
<<
1
)
+
p_block
[
x
+
i_stride
+
1
];
i_flt
[
y
][
x
]
=
(
8
+
i_f
)
>>
4
;
}
else
{
i_flt
[
y
][
x
]
=
p_block
[
x
];
}
i_bin
>>=
1
;
}
p_block
+=
i_stride
;
}
/* Create mm7 with all bytes set to QP/2 */
__asm__
__volatile__
(
"movl %0, %%eax
\n
"
"shrl $1, %%eax
\n
"
/* i_QP/2 */
"movb %%al, %%ah
\n
"
"movd %%eax, %%mm7
\n
"
"pshufw $0x00, %%mm7, %%mm7
\n
"
:
:
"r"
(
i_QP
)
:
"%eax"
);
for
(
y
=
0
;
y
<
8
;
y
++
)
{
/* clamp those values and copy them */
__asm__
__volatile__
(
"movq (%0), %%mm0
\n
"
/* mm0 = i_ftl[y][0] ... i_ftl[y][7] */
"movq (%1), %%mm1
\n
"
/* mm1 = p_sav[0] ... p_sav[7] */
"movq %%mm1, %%mm2
\n
"
"psubusb %%mm7, %%mm1
\n
"
/* mm1 = psav - i_QP/2 ( >= 0 ) */
"paddusb %%mm7, %%mm2
\n
"
/* mm2 = psav + i_QP/2 ( <= 255 ) */
"pmaxub %%mm1, %%mm0
\n
"
/* psav - i_QP/2 <= mm0 */
"pminub %%mm2, %%mm0
\n
"
/* mm0 <= psav + i_QP/2 */
"movq %%mm0, (%1)
\n
"
:
:
"r"
(
i_flt
[
y
]),
"r"
(
p_sav
)
:
"memory"
);
p_sav
+=
i_stride
;
}
}
/*****************************************************************************/
/*---------------------------------------------------------------------------*/
/* */
/* ----------------- Dering filter on Y and C blocks ----------------- */
/* */
/*---------------------------------------------------------------------------*/
/*****************************************************************************/
void
E_
(
pp_dering_Y
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
)
{
int
x
,
y
,
k
;
int
i_max
[
4
],
i_min
[
4
],
i_range
[
4
];
int
i_thr
[
4
];
int
i_max_range
,
i_kmax
;
uint32_t
i_bin
[
4
][
10
];
uint8_t
*
p_block
[
4
];
QT_STORE_T
*
p_QP
;
/* We process 4 blocks/loop*/
for
(
y
=
8
;
y
<
i_height
-
8
;
y
+=
16
)
{
/* +---+
|0|1|
+-+-+ :))
|2|3|
+-+-+ */
p_block
[
0
]
=
p_plane
+
y
*
i_stride
+
8
;
p_block
[
1
]
=
p_block
[
0
]
+
8
;
p_block
[
2
]
=
p_block
[
0
]
+
(
i_stride
<<
3
);
p_block
[
3
]
=
p_block
[
2
]
+
8
;
for
(
x
=
8
;
x
<
i_width
-
8
;
x
+=
16
)
{
/* 1: Calculate threshold */
/* Calculate max/min for each block */
pp_dering_MinMax
(
p_block
[
0
],
i_stride
,
&
i_min
[
0
],
&
i_max
[
0
]
);
pp_dering_MinMax
(
p_block
[
1
],
i_stride
,
&
i_min
[
1
],
&
i_max
[
1
]
);
pp_dering_MinMax
(
p_block
[
2
],
i_stride
,
&
i_min
[
2
],
&
i_max
[
2
]
);
pp_dering_MinMax
(
p_block
[
3
],
i_stride
,
&
i_min
[
3
],
&
i_max
[
3
]
);
/* Calculate range, max_range and thr */
i_max_range
=
0
;
i_kmax
=
0
;
for
(
k
=
0
;
k
<
4
;
k
++
)
{
i_range
[
k
]
=
i_max
[
k
]
-
i_min
[
k
];
i_thr
[
k
]
=
(
i_max
[
k
]
+
i_min
[
k
]
+
1
)
/
2
;
if
(
i_max_range
<
i_max
[
k
])
{
i_max_range
=
i_max
[
k
];
i_kmax
=
k
;
}
}
/* Now rearrange thr */
if
(
i_max_range
>
64
)
{
for
(
k
=
1
;
k
<
5
;
k
++
)
{
if
(
i_range
[
k
]
<
16
)
{
i_thr
[
k
]
=
0
;
}
else
if
(
i_range
[
k
]
<
32
)
{
i_thr
[
k
]
=
i_thr
[
i_kmax
];
}
}
}
else
{
for
(
k
=
1
;
k
<
5
;
k
++
)
{
if
(
i_range
[
k
]
<
16
)
{
i_thr
[
k
]
=
0
;
}
}
}
/* 2: Index acquisition 10x10 ! so " -i_stride - 1"*/
pp_dering_BinIndex
(
p_block
[
0
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
0
],
i_bin
[
0
]
);
pp_dering_BinIndex
(
p_block
[
1
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
1
],
i_bin
[
1
]
);
pp_dering_BinIndex
(
p_block
[
2
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
2
],
i_bin
[
2
]
);
pp_dering_BinIndex
(
p_block
[
3
]
-
i_stride
-
1
,
i_stride
,
i_thr
[
3
],
i_bin
[
3
]
);
/* 3: adaptive smoothing */
/* since we begin at (8,8) QP can be different for each block */
p_QP
=
&
(
p_QP_store
[(
y
>>
4
)
*
i_QP_stride
+
(
x
>>
4
)]
);
pp_dering_Filter
(
p_block
[
0
],
i_stride
,
i_bin
[
0
],
p_QP
[
0
]
);
pp_dering_Filter
(
p_block
[
1
],
i_stride
,
i_bin
[
1
],
p_QP
[
1
]
);
pp_dering_Filter
(
p_block
[
2
],
i_stride
,
i_bin
[
2
],
p_QP
[
i_QP_stride
]
);
pp_dering_Filter
(
p_block
[
3
],
i_stride
,
i_bin
[
3
],
p_QP
[
i_QP_stride
+
1
]
);
p_block
[
0
]
+=
8
;
p_block
[
1
]
+=
8
;
p_block
[
2
]
+=
8
;
p_block
[
3
]
+=
8
;
}
}
}
void
E_
(
pp_dering_C
)(
uint8_t
*
p_plane
,
int
i_width
,
int
i_height
,
int
i_stride
,
QT_STORE_T
*
p_QP_store
,
int
i_QP_stride
)
{
int
x
,
y
;
int
i_max
,
i_min
;
int
i_thr
;
uint32_t
i_bin
[
10
];
uint8_t
*
p_block
;
for
(
y
=
8
;
y
<
i_height
-
8
;
y
+=
8
)
{
p_block
=
p_plane
+
y
*
i_stride
+
8
;
for
(
x
=
8
;
x
<
i_width
-
8
;
x
+=
8
)
{
/* 1: Calculate threshold */
/* Calculate max/min for each block */
pp_dering_MinMax
(
p_block
,
i_stride
,
&
i_min
,
&
i_max
);
/* Calculate thr*/
i_thr
=
(
i_max
+
i_min
+
1
)
/
2
;
/* 2: Index acquisition 10x10 */
/* point on 10x10 in wich we have our 8x8 block */
pp_dering_BinIndex
(
p_block
-
i_stride
-
1
,
i_stride
,
i_thr
,
i_bin
);
/* 3: adaptive smoothing */
pp_dering_Filter
(
p_block
,
i_stride
,
i_bin
,
p_QP_store
[(
y
>>
5
)
*
i_QP_stride
+
(
x
>>
5
)]);
p_block
+=
8
;
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment