Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
L
libva
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
libva
Commits
20975a94
Commit
20975a94
authored
Sep 02, 2010
by
Xiang, Haihao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
i965_drv_video: add video processing kernels
parent
5f2030ba
Changes
109
Show whitespace changes
Inline
Side-by-side
Showing
109 changed files
with
7730 additions
and
1 deletion
+7730
-1
configure.ac
configure.ac
+1
-0
i965_drv_video/shaders/Makefile.am
i965_drv_video/shaders/Makefile.am
+1
-1
i965_drv_video/shaders/post_processing/Common/AYUV_Load_16x8.asm
...v_video/shaders/post_processing/Common/AYUV_Load_16x8.asm
+53
-0
i965_drv_video/shaders/post_processing/Common/AYUV_Load_16x8.inc
...v_video/shaders/post_processing/Common/AYUV_Load_16x8.inc
+43
-0
i965_drv_video/shaders/post_processing/Common/Expansion.inc
i965_drv_video/shaders/post_processing/Common/Expansion.inc
+31
-0
i965_drv_video/shaders/post_processing/Common/IMC3_Load_8x4.asm
...rv_video/shaders/post_processing/Common/IMC3_Load_8x4.asm
+47
-0
i965_drv_video/shaders/post_processing/Common/IMC3_Load_8x5.asm
...rv_video/shaders/post_processing/Common/IMC3_Load_8x5.asm
+47
-0
i965_drv_video/shaders/post_processing/Common/IMC3_Load_9x5.asm
...rv_video/shaders/post_processing/Common/IMC3_Load_9x5.asm
+50
-0
i965_drv_video/shaders/post_processing/Common/Init_All_Regs.asm
...rv_video/shaders/post_processing/Common/Init_All_Regs.asm
+18
-0
i965_drv_video/shaders/post_processing/Common/Multiple_Loop.asm
...rv_video/shaders/post_processing/Common/Multiple_Loop.asm
+84
-0
i965_drv_video/shaders/post_processing/Common/Multiple_Loop_Head.asm
...deo/shaders/post_processing/Common/Multiple_Loop_Head.asm
+23
-0
i965_drv_video/shaders/post_processing/Common/NV11_Load_4x8.asm
...rv_video/shaders/post_processing/Common/NV11_Load_4x8.asm
+42
-0
i965_drv_video/shaders/post_processing/Common/NV11_Load_5x8.asm
...rv_video/shaders/post_processing/Common/NV11_Load_5x8.asm
+42
-0
i965_drv_video/shaders/post_processing/Common/NV12_Load_8x4.asm
...rv_video/shaders/post_processing/Common/NV12_Load_8x4.asm
+42
-0
i965_drv_video/shaders/post_processing/Common/NV12_Load_8x5.asm
...rv_video/shaders/post_processing/Common/NV12_Load_8x5.asm
+42
-0
i965_drv_video/shaders/post_processing/Common/NV12_Load_9x5.asm
...rv_video/shaders/post_processing/Common/NV12_Load_9x5.asm
+42
-0
i965_drv_video/shaders/post_processing/Common/P208_Load_8x8.asm
...rv_video/shaders/post_processing/Common/P208_Load_8x8.asm
+41
-0
i965_drv_video/shaders/post_processing/Common/P208_Load_9x8.asm
...rv_video/shaders/post_processing/Common/P208_Load_9x8.asm
+42
-0
i965_drv_video/shaders/post_processing/Common/PA_Load.inc
i965_drv_video/shaders/post_processing/Common/PA_Load.inc
+42
-0
i965_drv_video/shaders/post_processing/Common/PA_Load_8x8.asm
..._drv_video/shaders/post_processing/Common/PA_Load_8x8.asm
+33
-0
i965_drv_video/shaders/post_processing/Common/PA_Load_9x8.asm
..._drv_video/shaders/post_processing/Common/PA_Load_9x8.asm
+47
-0
i965_drv_video/shaders/post_processing/Common/PL16x8_PL8x4.asm
...drv_video/shaders/post_processing/Common/PL16x8_PL8x4.asm
+38
-0
i965_drv_video/shaders/post_processing/Common/PL16x8_PL8x8.asm
...drv_video/shaders/post_processing/Common/PL16x8_PL8x8.asm
+36
-0
i965_drv_video/shaders/post_processing/Common/PL2_Load.inc
i965_drv_video/shaders/post_processing/Common/PL2_Load.inc
+78
-0
i965_drv_video/shaders/post_processing/Common/PL3_Load.inc
i965_drv_video/shaders/post_processing/Common/PL3_Load.inc
+59
-0
i965_drv_video/shaders/post_processing/Common/PL4x8_Save_NV11.asm
..._video/shaders/post_processing/Common/PL4x8_Save_NV11.asm
+86
-0
i965_drv_video/shaders/post_processing/Common/PL4x8_Save_NV11.inc
..._video/shaders/post_processing/Common/PL4x8_Save_NV11.inc
+60
-0
i965_drv_video/shaders/post_processing/Common/PL5x8_PL16x8.asm
...drv_video/shaders/post_processing/Common/PL5x8_PL16x8.asm
+29
-0
i965_drv_video/shaders/post_processing/Common/PL5x8_PL8x8.asm
..._drv_video/shaders/post_processing/Common/PL5x8_PL8x8.asm
+21
-0
i965_drv_video/shaders/post_processing/Common/PL8x4_Save_IMC3.asm
..._video/shaders/post_processing/Common/PL8x4_Save_IMC3.asm
+88
-0
i965_drv_video/shaders/post_processing/Common/PL8x4_Save_IMC3.inc
..._video/shaders/post_processing/Common/PL8x4_Save_IMC3.inc
+62
-0
i965_drv_video/shaders/post_processing/Common/PL8x4_Save_NV12.asm
..._video/shaders/post_processing/Common/PL8x4_Save_NV12.asm
+102
-0
i965_drv_video/shaders/post_processing/Common/PL8x4_Save_NV12.inc
..._video/shaders/post_processing/Common/PL8x4_Save_NV12.inc
+85
-0
i965_drv_video/shaders/post_processing/Common/PL8x5_PL8x8.asm
..._drv_video/shaders/post_processing/Common/PL8x5_PL8x8.asm
+27
-0
i965_drv_video/shaders/post_processing/Common/PL8x8_PL8x4.asm
..._drv_video/shaders/post_processing/Common/PL8x8_PL8x4.asm
+30
-0
i965_drv_video/shaders/post_processing/Common/PL8x8_PL8x4.inc
..._drv_video/shaders/post_processing/Common/PL8x8_PL8x4.inc
+36
-0
i965_drv_video/shaders/post_processing/Common/PL8x8_Save_P208.asm
..._video/shaders/post_processing/Common/PL8x8_Save_P208.asm
+56
-0
i965_drv_video/shaders/post_processing/Common/PL8x8_Save_P208.inc
..._video/shaders/post_processing/Common/PL8x8_Save_P208.inc
+61
-0
i965_drv_video/shaders/post_processing/Common/PL8x8_Save_PA.asm
...rv_video/shaders/post_processing/Common/PL8x8_Save_PA.asm
+71
-0
i965_drv_video/shaders/post_processing/Common/PL8x8_Save_PA.inc
...rv_video/shaders/post_processing/Common/PL8x8_Save_PA.inc
+52
-0
i965_drv_video/shaders/post_processing/Common/PL9x5_PL16x8.asm
...drv_video/shaders/post_processing/Common/PL9x5_PL16x8.asm
+37
-0
i965_drv_video/shaders/post_processing/Common/PL9x8_PL16x8.asm
...drv_video/shaders/post_processing/Common/PL9x8_PL16x8.asm
+21
-0
i965_drv_video/shaders/post_processing/Common/RGB16x8_Save_RGB.asm
...video/shaders/post_processing/Common/RGB16x8_Save_RGB.asm
+88
-0
i965_drv_video/shaders/post_processing/Common/RGB16x8_Save_RGB.inc
...video/shaders/post_processing/Common/RGB16x8_Save_RGB.inc
+38
-0
i965_drv_video/shaders/post_processing/Common/RGB16x8_Save_RGB16.asm
...deo/shaders/post_processing/Common/RGB16x8_Save_RGB16.asm
+72
-0
i965_drv_video/shaders/post_processing/Common/RGB16x8_Save_RGB16.inc
...deo/shaders/post_processing/Common/RGB16x8_Save_RGB16.inc
+49
-0
i965_drv_video/shaders/post_processing/Common/RGB16x8_Save_Y416.asm
...ideo/shaders/post_processing/Common/RGB16x8_Save_Y416.asm
+107
-0
i965_drv_video/shaders/post_processing/Common/RGB16x8_Save_Y416.inc
...ideo/shaders/post_processing/Common/RGB16x8_Save_Y416.inc
+38
-0
i965_drv_video/shaders/post_processing/Common/RGB_Pack.asm
i965_drv_video/shaders/post_processing/Common/RGB_Pack.asm
+40
-0
i965_drv_video/shaders/post_processing/Common/SetupVPKernel.asm
...rv_video/shaders/post_processing/Common/SetupVPKernel.asm
+34
-0
i965_drv_video/shaders/post_processing/Common/common.inc
i965_drv_video/shaders/post_processing/Common/common.inc
+610
-0
i965_drv_video/shaders/post_processing/Common/readSampler16x1.asm
..._video/shaders/post_processing/Common/readSampler16x1.asm
+55
-0
i965_drv_video/shaders/post_processing/Common/undefall.inc
i965_drv_video/shaders/post_processing/Common/undefall.inc
+65
-0
i965_drv_video/shaders/post_processing/Core_Kernels/AVS_IEF.inc
...rv_video/shaders/post_processing/Core_Kernels/AVS_IEF.inc
+108
-0
i965_drv_video/shaders/post_processing/Core_Kernels/AVS_SetupFirstBlock.asm
...ders/post_processing/Core_Kernels/AVS_SetupFirstBlock.asm
+35
-0
i965_drv_video/shaders/post_processing/Core_Kernels/AVS_SetupSecondBlock.asm
...ers/post_processing/Core_Kernels/AVS_SetupSecondBlock.asm
+27
-0
i965_drv_video/shaders/post_processing/Core_Kernels/DI.inc
i965_drv_video/shaders/post_processing/Core_Kernels/DI.inc
+194
-0
i965_drv_video/shaders/post_processing/Core_Kernels/DI_Hist_Save.asm
...deo/shaders/post_processing/Core_Kernels/DI_Hist_Save.asm
+24
-0
i965_drv_video/shaders/post_processing/Core_Kernels/DI_SAVE_PA.asm
...video/shaders/post_processing/Core_Kernels/DI_SAVE_PA.asm
+56
-0
i965_drv_video/shaders/post_processing/Core_Kernels/DNDI.inc
i965_drv_video/shaders/post_processing/Core_Kernels/DNDI.inc
+162
-0
i965_drv_video/shaders/post_processing/Core_Kernels/DNDI_COMMAND.asm
...deo/shaders/post_processing/Core_Kernels/DNDI_COMMAND.asm
+17
-0
i965_drv_video/shaders/post_processing/Core_Kernels/DNDI_Hist_Save.asm
...o/shaders/post_processing/Core_Kernels/DNDI_Hist_Save.asm
+20
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_16x8.asm
.../shaders/post_processing/Core_Kernels/PA_AVS_IEF_16x8.asm
+26
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x4.asm
...o/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x4.asm
+25
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x8.asm
...o/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x8.asm
+25
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Sample.asm
...haders/post_processing/Core_Kernels/PA_AVS_IEF_Sample.asm
+34
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm
...s/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm
+288
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm
...rs/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm
+77
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm
...rs/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm
+93
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PA_DNDI_ALG.asm
...ideo/shaders/post_processing/Core_Kernels/PA_DNDI_ALG.asm
+139
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PA_DN_ALG.asm
..._video/shaders/post_processing/Core_Kernels/PA_DN_ALG.asm
+54
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PA_Scaling.asm
...video/shaders/post_processing/Core_Kernels/PA_Scaling.asm
+70
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_16x8.asm
...shaders/post_processing/Core_Kernels/PL2_AVS_IEF_16x8.asm
+60
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x4.asm
.../shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x4.asm
+58
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x8.asm
.../shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x8.asm
+57
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm
.../post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm
+271
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm
...s/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm
+45
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm
...s/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm
+53
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL2_Scaling.asm
...ideo/shaders/post_processing/Core_Kernels/PL2_Scaling.asm
+71
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_16x8.asm
...shaders/post_processing/Core_Kernels/PL3_AVS_IEF_16x8.asm
+69
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x4.asm
.../shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x4.asm
+60
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x8.asm
.../shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x8.asm
+60
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm
.../post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm
+240
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm
...s/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm
+45
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm
...s/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm
+44
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL3_Scaling.asm
...ideo/shaders/post_processing/Core_Kernels/PL3_Scaling.asm
+72
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG.asm
...ideo/shaders/post_processing/Core_Kernels/PL_DNDI_ALG.asm
+85
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm
.../post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm
+103
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm
.../post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm
+103
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm
.../post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm
+101
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm
...s/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm
+106
-0
i965_drv_video/shaders/post_processing/Core_Kernels/PL_DN_ALG.asm
..._video/shaders/post_processing/Core_Kernels/PL_DN_ALG.asm
+35
-0
i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_16x8.asm
...shaders/post_processing/Core_Kernels/RGB_AVS_IEF_16x8.asm
+33
-0
i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm
.../post_processing/Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm
+251
-0
i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm
...t_processing/Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm
+260
-0
i965_drv_video/shaders/post_processing/Core_Kernels/RGB_Scaling.asm
...ideo/shaders/post_processing/Core_Kernels/RGB_Scaling.asm
+72
-0
i965_drv_video/shaders/post_processing/Core_Kernels/Scaling.inc
...rv_video/shaders/post_processing/Core_Kernels/Scaling.inc
+75
-0
i965_drv_video/shaders/post_processing/Makefile.am
i965_drv_video/shaders/post_processing/Makefile.am
+28
-0
i965_drv_video/shaders/post_processing/null.g4a
i965_drv_video/shaders/post_processing/null.g4a
+3
-0
i965_drv_video/shaders/post_processing/null.g4b
i965_drv_video/shaders/post_processing/null.g4b
+1
-0
i965_drv_video/shaders/post_processing/null.g4b.gen5
i965_drv_video/shaders/post_processing/null.g4b.gen5
+1
-0
i965_drv_video/shaders/post_processing/nv12_avs_nv12.asm
i965_drv_video/shaders/post_processing/nv12_avs_nv12.asm
+19
-0
i965_drv_video/shaders/post_processing/nv12_avs_nv12.g4b.gen5
..._drv_video/shaders/post_processing/nv12_avs_nv12.g4b.gen5
+162
-0
i965_drv_video/shaders/post_processing/nv12_dndi_nv12.asm
i965_drv_video/shaders/post_processing/nv12_dndi_nv12.asm
+18
-0
i965_drv_video/shaders/post_processing/nv12_dndi_nv12.g4b.gen5
...drv_video/shaders/post_processing/nv12_dndi_nv12.g4b.gen5
+86
-0
i965_drv_video/shaders/post_processing/nv12_load_save_nv12.asm
...drv_video/shaders/post_processing/nv12_load_save_nv12.asm
+17
-0
i965_drv_video/shaders/post_processing/nv12_load_save_nv12.g4b.gen5
...ideo/shaders/post_processing/nv12_load_save_nv12.g4b.gen5
+106
-0
i965_drv_video/shaders/post_processing/nv12_scaling_nv12.asm
i965_drv_video/shaders/post_processing/nv12_scaling_nv12.asm
+20
-0
i965_drv_video/shaders/post_processing/nv12_scaling_nv12.g4b.gen5
..._video/shaders/post_processing/nv12_scaling_nv12.g4b.gen5
+222
-0
No files found.
configure.ac
View file @
20975a94
...
...
@@ -163,6 +163,7 @@ AC_OUTPUT([
i965_drv_video/shaders/mpeg2/Makefile
i965_drv_video/shaders/mpeg2/vld/Makefile
i965_drv_video/shaders/render/Makefile
i965_drv_video/shaders/post_processing/Makefile
test/Makefile
test/basic/Makefile
test/decode/Makefile
...
...
i965_drv_video/shaders/Makefile.am
View file @
20975a94
SUBDIRS
=
h264 mpeg2 render
SUBDIRS
=
h264 mpeg2 render
post_processing
i965_drv_video/shaders/post_processing/Common/AYUV_Load_16x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
AYUV_Load_16x8.asm
//----------------------------------------------------------------
#
include
"
AYUV_Load_16x8.inc
"
//
In
order
to
load
64
x8
AYUV
data
(
16
x8
pixels
),
we
need
to
di
vide
the
data
//
into
two
regions
and
load
them
separately.
//
//
32
byte
32
byte
//|----------------|----------------|
//|
|
|
//|
A
|
B
|
8
//|
|
|
//|
|
|
//|----------------|----------------|
//
Load
the
first
32
x8
data
bl
ock
//
Packed
data
bl
ock
should
be
loaded
as
32
x8
pixel
bl
ock
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Block origin
shl
(
1
)
rMSGSRC.0
<
1
>
:
d
acc0
:
w
2
:
w
{
NoDDClr
}
//
H.
bl
ock
origin
need
to
be
four
times
larger
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_YUV
:
ud
{
NoDDChk
}
//
Bl
ock
width
and
height
(
32
x8
)
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_YUV
(
0
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_YUV
+
nBI_CURRENT_SRC_YUV
:
ud
//
Load
the
second
32
x8
data
bl
ock
//
Offset
the
origin
X
-
move
to
next
32
colomns
add
(
1
)
rMSGSRC.0
<
1
>
:
d
rMSGSRC.0
<
0
;1,0>:d 32:w // Increase X origin by 8
//
Size
stays
the
same
-
32
x8
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud // Copy message description to message header
send
(
8
)
udSRC_YUV
(
8
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_YUV
+
nBI_CURRENT_SRC_YUV
:
ud
//
Give
AYUV
region
addresses
to
address
register
mov
(
1
)
SRC_YUV_OFFSET
<
1
>
:
ud
0x00400038
*
32
:
ud
//
Address
registers
contain
starting
addresses
of
two
halves
//
Directly
move
the
data
to
destination
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
16
)
uwDEST_Y
(
%
1
)
<
1
>
r
[
SRC_YUV_OFFSET
,
%
1
*
32
+
2
]
<
8
,
4
>
:
ub
mov
(
16
)
uwDEST_U
(
%
1
)
<
1
>
r
[
SRC_YUV_OFFSET
,
%
1
*
32
+
1
]
<
8
,
4
>
:
ub
mov
(
16
)
uwDEST_V
(
%
1
)
<
1
>
r
[
SRC_YUV_OFFSET
,
%
1
*
32
+
0
]
<
8
,
4
>
:
ub
}
\ No newline at end of file
i965_drv_video/shaders/post_processing/Common/AYUV_Load_16x8.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Module name: AYUV_Load_16x8.inc
//
// AYUV data are first loaded to bottom I/O REGION_2, then unpacked to planar data
// and stored in top I/O REGION_1
#undef nY_NUM_OF_ROWS
#define nY_NUM_OF_ROWS 8 // Number of Y rows per block
#define nDPR_BLOCK_SIZE_YUV nBLOCK_WIDTH_32+nBLOCK_HEIGHT_8 // Y block size 32x8
#define nDPR_MSG_SIZE_YUV nRESLEN_8 // # of MRF's to hold Y block data (8)
//Temporary storage for unpacked AYUV data
#define rUNPACK_TEMP REG(r,nTEMP0)
.
declare
udUNPACK_TEMP
Base
=
rUNPACK_TEMP
ElementSize
=
4
SrcRegion
=<
8
;
8
,
1
>
Type
=
ud
//1 GRF
.
declare
ubUNPACK_TEMP
Base
=
rUNPACK_TEMP
ElementSize
=
1
SrcRegion
=<
32
;
32
,
1
>
Type
=
ub
//1 GRF
.
declare
ubBOT_Y_IO
Base
=
REG
(
r
,
nBOT_Y
)
ElementSize
=
1
SrcRegion
=
REGION
(
32
,
1
)
Type
=
ub
#define udSRC_YUV udBOT_Y_IO
#define ubSRC_YUV ubBOT_Y_IO
#define nSRC_YUV_REG nBOT_Y
#define uwDEST_Y uwTOP_Y
#define uwDEST_U uwTOP_U
#define uwDEST_V uwTOP_V
#define SRC_YUV_OFFSET a0.0
#define nSRC_REGION nREGION_1 // REGION_1 will be the source region for first kernel
// End of AYUV_Load_16x8.inc
i965_drv_video/shaders/post_processing/Common/Expansion.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Module name: Expansion.inc
// Number of U/V rows per block definition
#undef nUV_NUM_OF_ROWS
#ifdef EXPAND_9x5
#define nUV_NUM_OF_ROWS 6
#else
#define nUV_NUM_OF_ROWS 8
#endif
// Source/destination region definitions
#undef uwDEST_U
#undef uwDEST_V
#if (nSRC_REGION==nREGION_1)
#define uwDEST_U uwTOP_U
#define uwDEST_V uwTOP_V
#elif (nSRC_REGION==nREGION_2)
#define uwDEST_U uwBOT_U
#define uwDEST_V uwBOT_V
#endif
// End of Expansion.inc
i965_drv_video/shaders/post_processing/Common/IMC3_Load_8x4.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
IMC3_Load_8x4.asm
//
//----------------------------------------------------------------
#
define
IMC3_LOAD_8x4
#
include
"
PL3_Load.inc
"
//
Load
16
x8
planar
Y
----------------------------------------------------------
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
#
if
!
defined
(
LOAD_UV_ONLY
)
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_Y
:
ud
//
Bl
ock
width
and
height
(
16
x8
)
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_Y
(
0
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_Y
+
nBI_CURRENT_SRC_Y
:
ud
#
endif
//
Load
8
x4
planar
U
and
V
-----------------------------------------------------
asr
(
2
)
rMSGSRC.0
<
1
>
:
d
rMSGSRC.0
<
2
;2,1>:d 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
U
/
V
bl
ock
width
and
height
(
8
x4
)
mov
(
8
)
mMSGHDRU
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_U
(
0
)
<
1
>
mMSGHDRU
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_CURRENT_SRC_U
:
ud
mov
(
8
)
mMSGHDRV
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_V
(
0
)
<
1
>
mMSGHDRU
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_CURRENT_SRC_V
:
ud
//
Convert
to
word
-
al
igned
format
----------------------------------------------
#
if
!
defined
(
LOAD_UV_ONLY
)
$for
(
nY_NUM_OF_ROWS
-
1
; >-1; -1) {
mov
(
16
)
uwDEST_Y
(
0
,
%
1
*
16
)
<
1
>
ubSRC_Y
(
0
,
%
1
*
16
)
}
#
endif
$for
(
nUV_NUM_OF_ROWS
/
2
-
1
; >-1; -1) {
mov
(
16
)
uwDEST_U
(
0
,
%
1
*
16
)
<
1
>
ubSRC_U
(
0
,
%
1
*
16
)
mov
(
16
)
uwDEST_V
(
0
,
%
1
*
16
)
<
1
>
ubSRC_V
(
0
,
%
1
*
16
)
}
//
End
of
IMC3_Load_8x4
i965_drv_video/shaders/post_processing/Common/IMC3_Load_8x5.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
IMC3_Load_8x5.asm
//
//----------------------------------------------------------------
#
define
IMC3_LOAD_8x5
#
include
"
PL3_Load.inc
"
//
Load
16
x8
planar
Y
----------------------------------------------------------
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
#
if
!
defined
(
LOAD_UV_ONLY
)
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_Y
:
ud
//
Bl
ock
width
and
height
(
16
x8
)
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_Y
(
0
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_Y
+
nBI_CURRENT_SRC_Y
:
ud
#
endif
//
Load
8
x5
planar
U
and
V
-----------------------------------------------------
asr
(
2
)
rMSGSRC.0
<
1
>
:
d
rMSGSRC.0
<
2
;2,1>:d 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
U
/
V
bl
ock
width
and
height
(
8
x5
)
mov
(
8
)
mMSGHDRU
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_U
(
0
)
<
1
>
mMSGHDRU
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_CURRENT_SRC_U
:
ud
mov
(
8
)
mMSGHDRV
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_V
(
0
)
<
1
>
mMSGHDRU
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_CURRENT_SRC_V
:
ud
//
Convert
to
word
-
al
igned
format
----------------------------------------------
#
if
!
defined
(
LOAD_UV_ONLY
)
$for
(
nY_NUM_OF_ROWS
-
1
; >-1; -1) {
mov
(
16
)
uwDEST_Y
(
0
,
%
1
*
16
)
<
1
>
ubSRC_Y
(
0
,
%
1
*
16
)
}
#
endif
$for
(
nUV_NUM_OF_ROWS
/
2
-
1
; >-1; -1) {
mov
(
16
)
uwDEST_U
(
0
,
%
1
*
16
)
<
1
>
ubSRC_U
(
0
,
%
1
*
16
)
mov
(
16
)
uwDEST_V
(
0
,
%
1
*
16
)
<
1
>
ubSRC_V
(
0
,
%
1
*
16
)
}
//
End
of
IMC3_Load_8x5
i965_drv_video/shaders/post_processing/Common/IMC3_Load_9x5.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
IMC3_Load_9x5.asm
//
//----------------------------------------------------------------
//
This
module
loads
16
x8
Y
,
9
x5
U
and
9
x5
V
planar
data
bl
ocks
for
CS
C
module
//
and
stores
it
in
byte
-
al
igned
format.
//----------------------------------------------------------------
#
define
IMC3_LOAD_9x5
#
include
"
PL3_Load.inc
"
//
Load
16
x8
planar
Y
----------------------------------------------------------
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
#
if
!
defined
(
LOAD_UV_ONLY
)
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_Y
:
ud
//
Bl
ock
width
and
height
(
16
x8
)
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_Y
(
0
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_Y
+
nBI_CURRENT_SRC_Y
:
ud
#
endif
//
Load
9
x5
planar
U
and
V
-----------------------------------------------------
asr
(
2
)
rMSGSRC.0
<
1
>
:
d
rMSGSRC.0
<
2
;2,1>:d 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
U
/
V
bl
ock
width
and
height
(
12
x5
)
mov
(
8
)
mMSGHDRU
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_U
(
0
)
<
1
>
mMSGHDRU
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_CURRENT_SRC_U
:
ud
mov
(
8
)
mMSGHDRV
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_V
(
0
)
<
1
>
mMSGHDRU
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_CURRENT_SRC_V
:
ud
//
Convert
to
word
-
al
igned
format
----------------------------------------------
#
if
!
defined
(
LOAD_UV_ONLY
)
$for
(
nY_NUM_OF_ROWS
-
1
; >-1; -1) {
mov
(
16
)
uwDEST_Y
(
0
,
%
1
*
16
)
<
1
>
ubSRC_Y
(
0
,
%
1
*
16
)
}
#
endif
$for
(
nUV_NUM_OF_ROWS
-
2
; >-1; -1) {
mov
(
16
)
uwDEST_U
(
0
,
%
1
*
16
)
<
1
>
ubSRC_U
(
0
,
%
1
*
16
)
mov
(
16
)
uwDEST_V
(
0
,
%
1
*
16
)
<
1
>
ubSRC_V
(
0
,
%
1
*
16
)
}
//
End
of
IMC3_Load_9x5
i965_drv_video/shaders/post_processing/Common/Init_All_Regs.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
#
ifdef
GT
//
to
remove
error
messages
of
un
-
initialized
GRF
.declare
udGRF_space
Base
=
r0.0
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
ud
$for
(
7
; <80; 1) {
mov
(
8
)
udGRF_space
(
%
1
)
<
1
>
0
:
ud
}
#
else
#
endif
\ No newline at end of file
i965_drv_video/shaders/post_processing/Common/Multiple_Loop.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
/////////////////////////////////////////////////////////////////////////////////
//
Multiple_Loop.asm
//
This
lable
is
for
satisfying
component
kernel
build.
//
DL
will
remove
this
label
and
reference
the
real
one
in
Multiple_Loop_Head.asm.
#
if
defined
(
COMPONENT
)
VIDEO_PROCESSING_LOOP:
#
endif
//=====
Possible
build
flags
for
component
kernels
//
1)
INC_SCALING
//
2)
INC_BLENDING
//
3)
INC_BLENDING
and
INC_SCALING
//
4)
(
no
flags
)
#
define
MxN_MULTIPLE_BLOCKS
//------------------------------------------------------------------------------
#
if
defined
(
MxN_MULTIPLE_BLOCKS
)
//
Do
Multiple
Bl
ock
Processing
------------------------------------------------
//
The
1
st
bl
ock
has
been
processed
before
entering
the
loop
//
Processed
al
l
bl
ocks?
add.z.f0.0
(
1
)
wNUM_BLKS
:
w
wNUM_BLKS
:
w
-
1
:
w
//
Reached
multi
-
bl
ock
width?
add
(
1
)
wORIX
:
w
wORIX
:
w
16
:
w
cmp.l.f0.1
(
1
)
null
:
w
acc0.0
:
w
wFRAME_ENDX
:
w
//
acc0.0
has
wORIX
#
if
defined
(
INC_SCALING
)
//
Update
SRC_VID_H_ORI
for
scaling
mul
(
1
)
REG
(
r
,
nTEMP0
):
f
fVIDEO_STEP_X
:
f
16.0
:
f
add
(
1
)
fS
RC_VID_H_ORI
:
f
REG
(
r
,
nTEMP0
):
f
fS
RC_VID_H_ORI
:
f
#
endif
#
if
defined
(
INC_BLENDING
)
//
Update
SRC_ALPHA_H_ORI
for
bl
ending
mul
(
1
)
REG
(
r
,
nTEMP0
):
f
fALPHA_STEP_X
:
f
16.0
:
f
add
(
1
)
fS
RC_ALPHA_H_ORI
:
f
REG
(
r
,
nTEMP0
):
f
fS
RC_ALPHA_H_ORI
:
f
#
endif
(
f0.0
)
jmpi
(
1
)
END_VIDEO_PROCESSING
//
Al
l
bl
ocks
are
done
-
Exit
loop
(
f0.1
)
jmpi
(
1
)
VIDEO_PROCESSING_LOOP
//
If
not
the
end
of
row
,
goto
the
beginning
of
the
loop
//
If
end
of
row
,
restart
Horizontal
offset
and
calculate
Vertical
offsets
next
row.
mov
(
1
)
wORIX
:
w
wCOPY_ORIX
:
w
add
(
1
)
wORIY
:
w
wORIY
:
w
8
:
w
#
if
defined
(
INC_SCALING
)
//
Update
SRC_VID_H_ORI
and
SRC_VID_V_ORI
for
scaling
mov
(
1
)
fS
RC_VID_H_ORI
:
f
fFRAME_VID_ORIX
:
f
//
Reset
normalised
X
origin
to
0
for
video
and
al
pha
mul
(
1
)
REG
(
r
,
nTEMP0
):
f
fVIDEO_STEP_Y
:
f
8.0
:
f
add
(
1
)
fS
RC_VID_V_ORI
:
f
REG
(
r
,
nTEMP0
):
f
fS
RC_VID_V_ORI
:
f
#
endif
#
if
defined
(
INC_BLENDING
)
//
Update
SRC_ALPHA_H_ORI
and
SRC_ALPHA_V_ORI
for
bl
ending
mov
(
1
)
fS
RC_ALPHA_H_ORI
:
f
fFRAME_ALPHA_ORIX
:
f
//
Reset
normalised
X
origin
to
0
for
video
and
al
pha
mul
(
1
)
REG
(
r
,
nTEMP0
):
f
fALPHA_STEP_Y
:
f
8.0
:
f
add
(
1
)
fS
RC_ALPHA_V_ORI
:
f
REG
(
r
,
nTEMP0
):
f
fS
RC_ALPHA_V_ORI
:
f
#
endif
jmpi
(
1
)
VIDEO_PROCESSING_LOOP
//
Continue
Loop
END_VIDEO_PROCESSING:
nop
#
endif
END_THREAD
//
End
of
Thread
\ No newline at end of file
i965_drv_video/shaders/post_processing/Common/Multiple_Loop_Head.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//////////////////////////////////////////////////////////////////////////////////
//
Multiple_Loop_Head.asm
//
This
code
sets
up
the
loop
control
for
multiple
bl
ocks
per
thread
mul
(
1
)
wFRAME_ENDX
:
w
ubBLK_CNT_X
:
ub
16
:
uw
{
NoDDClr
}
//
Build
multi
-
bl
ock
loop
counters
mov
(
1
)
wNUM_BLKS
:
w
ubNUM_BLKS
:
ub
{
NoDDClr
,
NoDDChk
}
//
Copy
num
bl
ocks
to
word
variable
mov
(
1
)
wCOPY_ORIX
:
w
wORIX
:
w
{
NoDDChk
}
//
Copy
multi
-
bl
ock
origin
in
pixel
mov
(
2
)
fFRAME_VID_ORIX
<
1
>
:
f
fS
RC_VID_H_ORI
<
4
;2,2>:f // Copy src video origin for scaling, and alpha origin for blending
add
(
1
)
wFRAME_ENDX
:
w
wFRAME_ENDX
:
w
wORIX
:
w
//
Continue
building
multi
-
bl
ock
loop
counters
VIDEO_PROCESSING_LOOP:
//
Loop
back
entry
point
as
the
biginning
of
the
loop
for
multiple
bl
ocks
//
Beginning
of
the
loop
i965_drv_video/shaders/post_processing/Common/NV11_Load_4x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
NV11_Load_4x8.asm
//----------------------------------------------------------------
#
define
NV11_LOAD_4x8
#
include
"
PL2_Load.inc
"
//
Load
16
x8
NV11
Y
------------------------------------------------------------
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
#
if
!
defined
(
LOAD_UV_ONLY
)
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_Y
:
ud
//
Y
bl
ock
width
and
height
(
16
x8
)
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_Y
(
0
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_Y
+
nBI_CURRENT_SRC_Y
:
ud
#
endif
//
Load
8
x8
NV11
UV
----------------------------------------------------------
asr
(
1
)
rMSGSRC.0
<
1
>
:
d
rMSGSRC.0
<
0
;1,0>:d 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
U
/
V
bl
ock
width
and
height
(
8
x8
)
mov
(
8
)
mMSGHDRU
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_U
(
0
)
<
1
>
mMSGHDRU
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_CURRENT_SRC_UV
:
ud
//
Convert
to
word
-
al
igned
format
----------------------------------------------
#
if
!
defined
(
LOAD_UV_ONLY
)
$for
(
nY_NUM_OF_ROWS
-
1
; >-1; -1) {
mov
(
16
)
uwDEST_Y
(
0
,
%
1
*
16
)
<
1
>
ubSRC_Y
(
0
,
%
1
*
16
)
}
#
endif
$for
(
nUV_NUM_OF_ROWS
/
4
-
1
; >-1; -1) {
mov
(
16
)
uwDEST_U
(
0
,
%
1
*
16
)
<
1
>
ubSRC_U
(
0
,
%
1
*
32
)
<
32
;16,2>
mov
(
16
)
uwDEST_V
(
0
,
%
1
*
16
)
<
1
>
ubSRC_U
(
0
,
%
1
*
32
+
1
)
<
32
;16,2>
}
//
End
of
NV11_Load_4x8
i965_drv_video/shaders/post_processing/Common/NV11_Load_5x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
NV11_Load_5x8.asm
//----------------------------------------------------------------
#
define
NV11_LOAD_5x8
#
include
"
PL2_Load.inc
"
//
Load
16
x8
NV11
Y
------------------------------------------------------------
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
#
if
!
defined
(
LOAD_UV_ONLY
)
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_Y
:
ud
//
Y
bl
ock
width
and
height
(
16
x8
)
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_Y
(
0
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_Y
+
nBI_CURRENT_SRC_Y
:
ud
#
endif
//
Load
12
x8
NV11
UV
---------------------------------------------------------
asr
(
1
)
rMSGSRC.0
<
1
>
:
d
rMSGSRC.0
<
0
;1,0>:d 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
U
/
V
bl
ock
width
and
height
(
12
x8
)
mov
(
8
)
mMSGHDRU
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_U
(
0
)
<
1
>
mMSGHDRU
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_CURRENT_SRC_UV
:
ud
//
Convert
to
word
-
al
igned
format
----------------------------------------------
#
if
!
defined
(
LOAD_UV_ONLY
)
$for
(
nY_NUM_OF_ROWS
-
1
; >-1; -1) {
mov
(
16
)
uwDEST_Y
(
0
,
%
1
*
16
)
<
1
>
ubSRC_Y
(
0
,
%
1
*
16
)
}
#
endif
$for
(
nUV_NUM_OF_ROWS
/
2
-
1
; >-1; -1) {
mov
(
16
)
uwDEST_U
(
0
,
%
1
*
16
)
<
1
>
ubSRC_U
(
0
,
%
1
*
32
)
<
16
;8,2>
mov
(
16
)
uwDEST_V
(
0
,
%
1
*
16
)
<
1
>
ubSRC_U
(
0
,
%
1
*
32
+
1
)
<
16
;8,2>
}
//
End
of
NV11_Load_5x8
i965_drv_video/shaders/post_processing/Common/NV12_Load_8x4.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
NV12_Load_8x4.asm
//----------------------------------------------------------------
#
define
NV12_LOAD_8x4
#
include
"
PL2_Load.inc
"
//
Load
16
x8
planar
Y
----------------------------------------------------------
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
#
if
!
defined
(
LOAD_UV_ONLY
)
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_Y
:
ud
//
Y
bl
ock
width
and
height
(
16
x8
)
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_Y
(
0
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_Y
+
nBI_CURRENT_SRC_Y
:
ud
#
endif
//
Load
8
x4
planar
U
and
V
-----------------------------------------------------
asr
(
1
)
rMSGSRC.1
<
1
>
:
d
rMSGSRC.1
<
0
;1,0>:d 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
U
/
V
bl
ock
width
and
height
(
16
x4
)
mov
(
8
)
mMSGHDRU
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_U
(
0
)
<
1
>
mMSGHDRU
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_CURRENT_SRC_UV
:
ud
//
Convert
to
word
-
al
igned
format
----------------------------------------------
#
if
!
defined
(
LOAD_UV_ONLY
)
$for
(
nY_NUM_OF_ROWS
-
1
; >-1; -1) {
mov
(
16
)
uwDEST_Y
(
0
,
%
1
*
16
)
<
1
>
ubSRC_Y
(
0
,
%
1
*
16
)
}
#
endif
$for
(
nUV_NUM_OF_ROWS
/
2
-
1
; >-1; -1) {
mov
(
16
)
uwDEST_U
(
0
,
%
1
*
16
)
<
1
>
ubSRC_U
(
0
,
%
1
*
32
)
<
32
;16,2>
mov
(
16
)
uwDEST_V
(
0
,
%
1
*
16
)
<
1
>
ubSRC_U
(
0
,
%
1
*
32
+
1
)
<
32
;16,2>
}
//
End
of
NV12_Load_8x4
i965_drv_video/shaders/post_processing/Common/NV12_Load_8x5.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
NV12_Load_8x5.asm
//----------------------------------------------------------------
#
define
NV12_LOAD_8x5
#
include
"
PL2_Load.inc
"
//
Load
16
x8
planar
Y
----------------------------------------------------------
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
#
if
!
defined
(
LOAD_UV_ONLY
)
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_Y
:
ud
//
Y
bl
ock
width
and
height
(
16
x8
)
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_Y
(
0
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_Y
+
nBI_CURRENT_SRC_Y
:
ud
#
endif
//
Load
8
x5
planar
U
and
V
-----------------------------------------------------
asr
(
1
)
rMSGSRC.1
<
1
>
:
d
rMSGSRC.1
<
0
;1,0>:d 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
U
/
V
bl
ock
width
and
height
(
16
x5
)
mov
(
8
)
mMSGHDRU
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_U
(
0
)
<
1
>
mMSGHDRU
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_CURRENT_SRC_UV
:
ud
//
Convert
to
word
-
al
igned
format
----------------------------------------------
#
if
!
defined
(
LOAD_UV_ONLY
)
$for
(
nY_NUM_OF_ROWS
-
1
; >-1; -1) {
mov
(
16
)
uwDEST_Y
(
0
,
%
1
*
16
)
<
1
>
ubSRC_Y
(
0
,
%
1
*
16
)
}
#
endif
$for
(
nUV_NUM_OF_ROWS
/
2
-
1
; >-1; -1) {
mov
(
16
)
uwDEST_U
(
0
,
%
1
*
16
)
<
1
>
ubSRC_U
(
0
,
%
1
*
32
)
<
16
;8,2>
mov
(
16
)
uwDEST_V
(
0
,
%
1
*
16
)
<
1
>
ubSRC_U
(
0
,
%
1
*
32
+
1
)
<
16
;8,2>
}
//
End
of
NV12_Load_8x5
i965_drv_video/shaders/post_processing/Common/NV12_Load_9x5.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
NV12_Load_9x5.asm
//----------------------------------------------------------------
#
define
NV12_LOAD_9x5
#
include
"
PL2_Load.inc
"
//
Load
16
x8
planar
Y
----------------------------------------------------------
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
#
if
!
defined
(
LOAD_UV_ONLY
)
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_Y
:
ud
//
Y
bl
ock
width
and
height
(
16
x8
)
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_Y
(
0
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_Y
+
nBI_CURRENT_SRC_Y
:
ud
#
endif
//
Load
9
x5
planar
U
and
V
-----------------------------------------------------
asr
(
1
)
rMSGSRC.1
<
1
>
:
d
rMSGSRC.1
<
0
;1,0>:d 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
U
/
V
bl
ock
width
and
height
(
20
x5
)
mov
(
8
)
mMSGHDRU
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_U
(
0
)
<
1
>
mMSGHDRU
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_CURRENT_SRC_UV
:
ud
//
Convert
to
word
-
al
igned
format
----------------------------------------------
#
if
!
defined
(
LOAD_UV_ONLY
)
$for
(
nY_NUM_OF_ROWS
-
1
; >-1; -1) {
mov
(
16
)
uwDEST_Y
(
0
,
%
1
*
16
)
<
1
>
ubSRC_Y
(
0
,
%
1
*
16
)
}
#
endif
$for
(
nUV_NUM_OF_ROWS
-
2
; >-1; -1) {
mov
(
16
)
uwDEST_U
(
0
,
%
1
*
16
)
<
1
>
ubSRC_U
(
0
,
%
1
*
32
)
<
16
;8,2>
mov
(
16
)
uwDEST_V
(
0
,
%
1
*
16
)
<
1
>
ubSRC_U
(
0
,
%
1
*
32
+
1
)
<
16
;8,2>
}
//
End
of
NV12_Load_9x5
i965_drv_video/shaders/post_processing/Common/P208_Load_8x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
P208_Load_8x8.asm
//----------------------------------------------------------------
#
define
P208_LOAD_8x8
#
include
"
PL2_Load.inc
"
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_Y
:
ud
//
Y
Bl
ock
width
and
height
(
16
x8
)
(
U
/
V
bl
ock
si
ze
is
the
same
)
//
Load
16
x8
P208
Y
------------------------------------------------------------
#
if
!
defined
(
LOAD_UV_ONLY
)
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_Y
(
0
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_Y
+
nBI_CURRENT_SRC_Y
:
ud
#
endif
//
Load
16
x8
planar
UV
-----------------------------------------------------
mov
(
8
)
mMSGHDRU
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_U
(
0
)
<
1
>
mMSGHDRU
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_CURRENT_SRC_UV
:
ud
//
Convert
to
word
-
al
igned
format
----------------------------------------------
#
if
!
defined
(
LOAD_UV_ONLY
)
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
16
)
uwDEST_Y
(
0
,
%
1
*
16
)
ubSRC_Y
(
0
,
%
1
*
16
)
}
#
endif
$for
(
0
; <nUV_NUM_OF_ROWS/2; 1) {
mov
(
16
)
uwDEST_U
(
0
,
%
1
*
16
)
ubSRC_U
(
0
,
%
1
*
32
)
<
32
;16,2>
mov
(
16
)
uwDEST_V
(
0
,
%
1
*
16
)
ubSRC_U
(
0
,
%
1
*
32
+
1
)
<
32
;16,2>
}
//
End
of
P208_Load_8x8.asm
i965_drv_video/shaders/post_processing/Common/P208_Load_9x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
P208_Load_9x8.asm
//----------------------------------------------------------------
#
define
P208_LOAD_9x8
#
include
"
PL2_Load.inc
"
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
//
Load
16
x8
P208
Y
------------------------------------------------------------
#
if
!
defined
(
LOAD_UV_ONLY
)
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_Y
:
ud
//
Y
bl
ock
width
and
height
(
16
x8
)
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_Y
(
0
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_Y
+
nBI_CURRENT_SRC_Y
:
ud
#
endif
//
Load
16
x8
planar
UV
-----------------------------------------------------
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
U
/
V
bl
ock
width
and
height
(
20
x8
)
mov
(
8
)
mMSGHDRU
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_U
(
0
)
<
1
>
mMSGHDRU
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_CURRENT_SRC_UV
:
ud
//
Convert
to
word
-
al
igned
format
----------------------------------------------
#
if
!
defined
(
LOAD_UV_ONLY
)
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
16
)
uwDEST_Y
(
0
,
%
1
*
16
)
ubSRC_Y
(
0
,
%
1
*
16
)
}
#
endif
$for
(
0
; <nUV_NUM_OF_ROWS; 1) {
mov
(
16
)
uwDEST_U
(
0
,
%
1
*
16
)
ubSRC_U
(
0
,
%
1
*
32
)
<
32
;16,2>
mov
(
16
)
uwDEST_V
(
0
,
%
1
*
16
)
ubSRC_U
(
0
,
%
1
*
32
+
1
)
<
32
;16,2>
}
//
End
of
P208_Load_9x8.asm
i965_drv_video/shaders/post_processing/Common/PA_Load.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Module name: PA_Load.inc
//
// YUV422 data are first loaded to bottom I/O REGION_2, then unpacked to planar data
// and stored in top I/O REGION_1
#undef nY_NUM_OF_ROWS
#undef nUV_NUM_OF_ROWS
#define nY_NUM_OF_ROWS 8 // Number of Y rows per block
#define nUV_NUM_OF_ROWS 8 // Number of U/V rows per block
#if defined(PA_LOAD_8x8)
#define nDPR_BLOCK_SIZE_YUV nBLOCK_WIDTH_32+nBLOCK_HEIGHT_8 // Y block size 32x8
#define nDPR_MSG_SIZE_YUV nRESLEN_8 // # of MRF's to hold Y block data (8)
#endif
#if defined(PA_LOAD_9x8)
#define nDPR_BLOCK_SIZE_YUV_MAIN nBLOCK_WIDTH_32+nBLOCK_HEIGHT_8 // Main YUV block size 32x8
#define nDPR_MSG_SIZE_YUV_MAIN nRESLEN_8 // # of MRF's to hold Y block data (8)
#define nDPR_BLOCK_SIZE_YUV_ADDITION nBLOCK_WIDTH_4+nBLOCK_HEIGHT_8 // Additional YUV block size 4x8
#define nDPR_MSG_SIZE_YUV_ADDITION nRESLEN_1 // # of MRF's to hold Y block data (8)
#endif
#define udSRC_YUV udBOT_Y_IO
#define nSRC_YUV_REG nBOT_Y
#define uwDEST_Y uwTOP_Y
#define uwDEST_U uwTOP_U
#define uwDEST_V uwTOP_V
#define nSRC_REGION nREGION_1 // REGION_1 will be the source region for first kernel
// End of PA_Load.inc
i965_drv_video/shaders/post_processing/Common/PA_Load_8x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
PA_Load_8x8.asm
//----------------------------------------------------------------
#
define
PA_LOAD_8x8
#
include
"
PA_Load.inc
"
//
Load
16
x8
packed
data
bl
ock
//
Packed
data
bl
ock
should
be
loaded
as
32
x8
pixel
bl
ock
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Block origin
shl
(
1
)
rMSGSRC.0
<
1
>
:
d
acc0
:
w
1
:
w
//
H.
bl
ock
origin
need
to
be
doubled
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_YUV
:
ud
//
Bl
ock
width
and
height
(
32
x8
)
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_YUV
(
0
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_YUV
+
nBI_CURRENT_SRC_YUV
:
ud
//
Unpack
to
"
planar
"
YUV422
format
in
word
-
al
igned
byte
s
add
(
4
)
pCF_Y_OFFSET
<
1
>
:
uw
ubSRC_CF_OFFSET
<
4
;4,1>:ub nSRC_YUV_REG*nGRFWIB:w // Initial Y,U,V offset in YUV422 block
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
16
)
uwDEST_Y
(
0
,
%
1
*
16
)
<
1
>
r
[
pCF_Y_OFFSET
,
%
1
*
nGRFWIB
]
REGION
(
16
,
2
)
mov
(
8
)
uwDEST_U
(
0
,
%
1
*
8
)
<
1
>
r
[
pCF_U_OFFSET
,
%
1
*
nGRFWIB
]
REGION
(
8
,
4
)
mov
(
8
)
uwDEST_V
(
0
,
%
1
*
8
)
<
1
>
r
[
pCF_V_OFFSET
,
%
1
*
nGRFWIB
]
REGION
(
8
,
4
)
}
//
End
of
PA_Load_8x8
i965_drv_video/shaders/post_processing/Common/PA_Load_9x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
PA_Load_9x8.asm
//----------------------------------------------------------------
//
This
module
loads
16
x8
Y
,
9
x8
U
and
9
x8
V
planar
data
bl
ocks
for
CS
C
module
//
and
stores
it
in
word
-
al
igned
format.
//----------------------------------------------------------------
#
define
PA_LOAD_9x8
#
include
"
PA_Load.inc
"
//
Load
18
x8
packed
data
bl
ock
//
Packed
data
bl
ock
should
be
loaded
as
36
x8
pixel
bl
ock
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Block origin
shl
(
1
)
rMSGSRC.0
<
1
>
:
d
acc0
:
w
1
:
w
//
H.
bl
ock
origin
need
to
be
doubled
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_YUV_MAIN
:
ud
//
Bl
ock
width
and
height
(
32
x8
)
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_YUV
(
0
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_YUV_MAIN
+
nBI_CURRENT_SRC_YUV
:
ud
add
(
1
)
rMSGSRC.0
<
1
>
:
d
rMSGSRC.0
:
d
32
:
w
//
the
last
4
pixels
are
read
again
for
optimization
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_YUV_ADDITION
:
ud
//
Bl
ock
width
and
height
(
4
x8
)
mov
(
8
)
mMSGHDRY
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_YUV
(
8
)
<
1
>
mMSGHDRY
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_YUV_ADDITION
+
nBI_CURRENT_SRC_YUV
:
ud
//
Unpack
to
"
planar
"
YUV422
format
in
word
-
al
igned
byte
s
add
(
4
)
pCF_Y_OFFSET
<
1
>
:
uw
ubSRC_CF_OFFSET
<
4
;4,1>:ub nSRC_YUV_REG*nGRFWIB:w // Initial Y,U,V offset in YUV422 block
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
16
)
uwDEST_Y
(
0
,
%
1
*
16
)
<
1
>
r
[
pCF_Y_OFFSET
,
%
1
*
nGRFWIB
]
REGION
(
16
,
2
)
mov
(
8
)
uwDEST_U
(
0
,
%
1
*
16
)
<
1
>
r
[
pCF_U_OFFSET
,
%
1
*
nGRFWIB
]
REGION
(
8
,
4
)
mov
(
8
)
uwDEST_V
(
0
,
%
1
*
16
)
<
1
>
r
[
pCF_V_OFFSET
,
%
1
*
nGRFWIB
]
REGION
(
8
,
4
)
}
$for
(
0
; <nUV_NUM_OF_ROWS; 1) {
mov
(
1
)
uwDEST_U
(
0
,
%
1
*
16
+
8
)
<
1
>
r
[
pCF_U_OFFSET
,
%
1
*
4
+
256
]
REGION
(
1
,
0
)
mov
(
1
)
uwDEST_V
(
0
,
%
1
*
16
+
8
)
<
1
>
r
[
pCF_V_OFFSET
,
%
1
*
4
+
256
]
REGION
(
1
,
0
)
}
//
UV
expansion
done
in
PL9x8_PL16x8.asm
module
//
End
of
PA_Load_9x8
i965_drv_video/shaders/post_processing/Common/PL16x8_PL8x4.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
PL16x8_PL8x4.asm
//----------------------------------------------------------------
#
include
"
common.inc
"
#
ifndef
DEST_U
//
DEST_U
,
DEST_V
not
defined
#
if
(
nSRC_REGION
==
nREGION_1
)
#
define
DEST_Y
uwTOP_Y
#
define
DEST_U
uwTOP_U
#
define
DEST_V
uwTOP_V
#
elif
(
nSRC_REGION
==
nREGION_2
)
#
define
DEST_Y
uwBOT_Y
#
define
DEST_U
uwBOT_U
#
define
DEST_V
uwBOT_V
#
endif
#
endif
//
Convert
444
from
sampler
to
422
$for
(
0
,
0
; <8; 2, 1) {
mov
(
8
)
DEST_U
(
0
,
%
2
*
8
)
<
1
>
DEST_U
(
%
1
)
<
16
;8,2>
mov
(
8
)
DEST_V
(
0
,
%
2
*
8
)
<
1
>
DEST_V
(
%
1
)
<
16
;8,2>
}
//
Re
-
define
new
number
of
lines
#
undef
nUV_NUM_OF_ROWS
#
define
nUV_NUM_OF_ROWS
4
i965_drv_video/shaders/post_processing/Common/PL16x8_PL8x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
PL16x8_PL8x8.asm
//----------------------------------------------------------------
#
include
"
common.inc
"
#
ifndef
DEST_U
//
DEST_U
,
DEST_V
not
defined
#
if
(
nSRC_REGION
==
nREGION_1
)
#
define
DEST_Y
uwTOP_Y
#
define
DEST_U
uwTOP_U
#
define
DEST_V
uwTOP_V
#
elif
(
nSRC_REGION
==
nREGION_2
)
#
define
DEST_Y
uwBOT_Y
#
define
DEST_U
uwBOT_U
#
define
DEST_V
uwBOT_V
#
endif
#
endif
//
Convert
444
from
sampler
to
422
$for
(
0
,
0
; <8; 2, 1) {
mov
DEST_U
(
%
2
)
<
1
>
DEST_U
(
%
1
)
<
16
;8,2>
mov
DEST_V
(
%
2
)
<
1
>
DEST_V
(
%
1
)
<
16
;8,2>
}
i965_drv_video/shaders/post_processing/Common/PL2_Load.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Module name: PL2_Load.inc
#undef nY_NUM_OF_ROWS
#undef nUV_NUM_OF_ROWS
#define nY_NUM_OF_ROWS 8 // Number of Y rows per block
#define nDPR_BLOCK_SIZE_Y nBLOCK_WIDTH_16+nBLOCK_HEIGHT_8 // Y block size 16x8
#define nDPR_MSG_SIZE_Y nRESLEN_4 // # of MRF's to hold Y block data (4)
#if defined(NV11_LOAD_4x8)
#define nUV_NUM_OF_ROWS 8 // Number of U/V rows per block
#define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_8+nBLOCK_HEIGHT_8 // U/V block size 8x8
#define nDPR_MSG_SIZE_UV nRESLEN_2 // # of MRF's to hold U/V block data (2)
#endif
#if defined(NV11_LOAD_5x8)
#define nUV_NUM_OF_ROWS 8 // Number of U/V rows per block
#define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_12+nBLOCK_HEIGHT_8 // U/V block size 12x8
#define nDPR_MSG_SIZE_UV nRESLEN_4 // # of MRF's to hold U/V block data (4)
#endif
#if defined(NV12_LOAD_8x4)
#define nUV_NUM_OF_ROWS 4 // Number of U/V rows per block
#define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_16+nBLOCK_HEIGHT_4 // U/V block size 16x4
#define nDPR_MSG_SIZE_UV nRESLEN_2 // # of MRF's to hold U/V block data (2)
#endif
#if defined(NV12_LOAD_8x5)
#define nUV_NUM_OF_ROWS 6 // Number of U/V rows per block (Rounded Up to Nearest Even Number)
#define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_16+nBLOCK_HEIGHT_5 // U/V block size 16x5
#define nDPR_MSG_SIZE_UV nRESLEN_3 // # of MRF's to hold U/V block data (3)
#endif
#if defined(NV12_LOAD_9x5)
#define nUV_NUM_OF_ROWS 6 // Number of U/V rows per block (Rounded Up to Nearest Even Number)
#define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_20+nBLOCK_HEIGHT_5 // U/V block size 20x5
#define nDPR_MSG_SIZE_UV nRESLEN_5 // # of MRF's to hold U/V block data (5)
#endif
#if defined(P208_LOAD_8x8)
#define nUV_NUM_OF_ROWS 8 // Number of U/V rows per block
#define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_16+nBLOCK_HEIGHT_8 // U/V block size 16x8
#define nDPR_MSG_SIZE_UV nRESLEN_4 // # of MRF's to hold U/V block data (4)
#endif
#if defined(P208_LOAD_9x8)
#define nUV_NUM_OF_ROWS 8 // Number of U/V rows per block
#define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_20+nBLOCK_HEIGHT_8 // U/V block size 20x8
#define nDPR_MSG_SIZE_UV nRESLEN_8 // # of MRF's to hold U/V block data (8)
#endif
// Source/destination region definitions
#if !defined(udSRC_Y)
#define udSRC_Y udBOT_Y_IO // Default Y source region is top Y region
#endif
#if !defined(udSRC_U)
#define udSRC_U udBOT_U_IO // Default U source region is top U region
#endif
#define ubSRC_Y ubBOT_Y
#define nSRC_Y_REG nBOT_Y
#define ubSRC_U ubBOT_U
#define nSRC_U_REG nBOT_U
#define uwDEST_Y uwTOP_Y // However they can be transferred to word-aligned byte if desired
#define uwDEST_U uwTOP_U
#define uwDEST_V uwTOP_V
#define nSRC_REGION nREGION_1 // REGION_1 will be the source region for first kernel
// End of PL2_Load.inc
i965_drv_video/shaders/post_processing/Common/PL3_Load.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Module name: PL3_Load.inc
#undef nY_NUM_OF_ROWS
#undef nUV_NUM_OF_ROWS
#define nY_NUM_OF_ROWS 8 // Number of Y rows per block
#define nDPR_BLOCK_SIZE_Y nBLOCK_WIDTH_16+nBLOCK_HEIGHT_8 // Y block size 16x8
#define nDPR_MSG_SIZE_Y nRESLEN_4 // # of MRF's to hold Y block data (4)
#if defined(IMC3_LOAD_8x4)
#define nUV_NUM_OF_ROWS 4 // Number of U/V rows per block
#define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_8+nBLOCK_HEIGHT_4 // U/V block size 8x4
#define nDPR_MSG_SIZE_UV nRESLEN_1 // # of MRF's to hold U/V block data (1)
#endif
#if defined(IMC3_LOAD_8x5)
#define nUV_NUM_OF_ROWS 6 // Number of U/V rows per block (Rounded Up to Nearest Even Number)
#define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_8+nBLOCK_HEIGHT_5 // U/V block size 8x5
#define nDPR_MSG_SIZE_UV nRESLEN_2 // # of MRF's to hold U/V block data (2)
#endif
#if defined(IMC3_LOAD_9x5)
#define nUV_NUM_OF_ROWS 6 // Number of U/V rows per block (Rounded Up to Nearest Even Number)
#define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_12+nBLOCK_HEIGHT_5 // U/V block size 12x5
#define nDPR_MSG_SIZE_UV nRESLEN_3 // # of MRF's to hold U/V block data (3)
#endif
// Source/destination region definitions
#if !defined(udSRC_Y)
#define udSRC_Y udBOT_Y_IO // Default Y source region is top Y region
#endif
#if !defined(udSRC_U)
#define udSRC_U udBOT_U_IO // Default U source region is top U region
#endif
#if !defined(udSRC_V)
#define udSRC_V udBOT_V_IO // Default V source region is top V region
#endif
#define ubSRC_Y ubBOT_Y // Loading data are always in byte type
#define ubSRC_U ubBOT_U
#define ubSRC_V ubBOT_V
#define uwDEST_Y uwTOP_Y // However they can be transferred to word-aligned byte if desired
#define uwDEST_U uwTOP_U
#define uwDEST_V uwTOP_V
#define nSRC_REGION nREGION_1 // REGION_1 will be the source region for first kernel
// End of PL3_Load.inc
i965_drv_video/shaders/post_processing/Common/PL4x8_Save_NV11.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
#
include
"
PL4x8_Save_NV11.inc
"
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
#
if
!
defined
(
SAVE_UV_ONLY
)
//
Save
current
planar
frame
Y
bl
ock
data
(
16
x8
)
-------------------------------
mov
(
2
)
mMSGHDR.0
<
1
>
:
d
wORIX
<
2
;2,1>:w // Block origin
mov
(
1
)
mMSGHDR.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_Y
:
ud
//
Bl
ock
width
and
height
(
16
x8
)
///*
Yoni
-
masking
is
not
relevant
for
ILK?
!
?
//
//
//
Use
the
mask
to
determine
which
pixels
shouldn
'
t
be
over
-
written
//
cmp.ge.f0.0
(
1
)
NULLREG
BL
OCK_MASK_D
:
ud
0x00FFFFFF
:
ud
//
Ch
eck
if
al
l
pixels
in
the
bl
ock
need
to
be
modified
//
(
f0.0
)
jmpi
WritePlanarToDataPort
//
//
//
If
mask
is
not
al
l
1
'
s
,
then
load
the
entire
16
x8
bl
ock
//
//
so
that
only
those
byte
s
may
be
modified
that
need
to
be
(
using
the
mask
)
//
send
(
8
)
SRC_YD
(
0
)
<
1
>
MSGHDR
MSGSRC
<
8
;8,1>:ud DWBRMSGDSC+0x00040000+BI_DEST_Y:ud //16x8
//
//
asr
(
2
)
MSGSRC.0
<
1
>
:
ud
ORIX
<
2
;2,1>:w 1:w // U/V block origin should be half of Y's
//
mov
(
1
)
MSGSRC.2
<
1
>
:
ud
0x00030007
:
ud
//
Bl
ock
width
and
height
(
8
x4
)
//
send
(
8
)
SRC_UD
(
0
)
<
1
>
MSGHDR
MSGSRC
<
8
;8,1>:ud DWBRMSGDSC+0x00010000+BI_DEST_U:ud
//
send
(
8
)
SRC_VD
(
0
)
<
1
>
MSGHDR
MSGSRC
<
8
;8,1>:ud DWBRMSGDSC+0x00010000+BI_DEST_V:ud
//
//
//
Rest
ore
the
origin
information
//
mov
(
2
)
MSGSRC.0
<
1
>
:
ud
ORIX
<
2
;2,1>:w // Block origin
//
mov
(
1
)
MSGSRC.2
<
1
>
:
ud
0x0007000F
:
ud
//
Bl
ock
width
and
height
(
16
x8
)
//
//
//
expand
U
and
V
to
be
al
igned
on
word
boundary
//
mov
(
16
)
SRC_UW
(
1
)
<
1
>
SRC_U
(
0
,
16
)
//
mov
(
16
)
SRC_UW
(
0
)
<
1
>
SRC_U
(
0
,
0
)
//
mov
(
16
)
SRC_VW
(
1
)
<
1
>
SRC_V
(
0
,
16
)
//
mov
(
16
)
SRC_VW
(
0
)
<
1
>
SRC_V
(
0
,
0
)
//
//
//
Merge
the
data
//
mov
(
1
)
f0.1
:
uw
BL
OCK_MASK_V
:
uw
//
Load
the
mask
on
flag
reg
//
(
f0.1
)
mov
(
8
)
TEMP0
<
1
>
:
uw
BL
OCK_MASK_H
:
uw
//
(-
f0.1
)
mov
(
8
)
TEMP0
<
1
>
:
uw
0
:
uw
//
//
//
Destination
is
Word
al
igned
//
$for
(
0
; <Y_ROW_SIZE; 2) {
//
mov
(
1
)
f0.1
:
uw
TEMP
(
0
,
%
1
)
<
0
;1,0>
//
(-
f0.1
)
mov
(
16
)
DEST_Y
(
0
,
%
1
*
32
)
<
2
>
SRC_Y
(
0
,
%
1
*
16
)
//
(-
f0.1
)
mov
(
16
)
DEST_U
(
0
,
%
1
*
8
)
<
1
>
SRC_U
(
0
,
%
1
*
8
)
//
only
works
for
Word
al
igned
Byte
data
//
(-
f0.1
)
mov
(
16
)
DEST_V
(
0
,
%
1
*
8
)
<
1
>
SRC_V
(
0
,
%
1
*
8
)
//
only
works
for
Word
al
igned
Byte
data
//
//
mov
(
1
)
f0.1
:
uw
TEMP
(
0
,
1
+%
1
)
<
0
;1,0>
//
(-
f0.1
)
mov
(
16
)
DEST_Y
(
0
,
1
+%
1
*
32
)
<
2
>
SRC_Y
(
0
,
1
+%
1
*
16
)
//
//
}
//
//*/
Yoni
-
masking
is
not
relevant
for
ILK?
!
?
WritePlanarToDataPort:
$for
(
0
,
0
; <nY_NUM_OF_ROWS; 2,1) {
mov
(
16
)
mubMSGPAYLOAD
(
%
2
,
0
)
<
1
>
ub2DEST_Y
(
%
1
)
REGION
(
16
,
2
)
mov
(
16
)
mubMSGPAYLOAD
(
%
2
,
16
)
<
1
>
ub2DEST_Y
(
%
1
+
1
)
REGION
(
16
,
2
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_Y
+
nBI_DESTINATION_Y
:
ud
#
endif
//
Save
U
/
V
data
bl
ock
in
planar
format
(
4
x8
)
----------------------------------
mov
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w // Block origin
asr
(
1
)
rMSGSRC.0
<
1
>
:
d
rMSGSRC.0
<
0
;1,0>:d 1:w // U/V block origin should be half of Y's
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
$for
(
0
,
0
; <nY_NUM_OF_ROWS;4,1) {
mov
(
16
)
mubMSGPAYLOAD
(
%
2
,
0
)
<
2
>
ub2DEST_U
(
%
2
)
REGION
(
16
,
2
)
mov
(
16
)
mubMSGPAYLOAD
(
%
2
,
1
)
<
2
>
ub2DEST_V
(
%
2
)
REGION
(
16
,
2
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_UV
+
nBI_DESTINATION_UV
:
ud
//
End
of
PL4x8_Save_NV11
i965_drv_video/shaders/post_processing/Common/PL4x8_Save_NV11.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
//Module name: PL8x4_Save_NV11.inc
//
// Setup for storing planar data
//
#include "undefall.inc" //Undefine the SRC and DEST sysmbols
#define nDPW_BLOCK_SIZE_Y nBLOCK_WIDTH_16+nBLOCK_HEIGHT_8 // Y block size 16x8
#define nDPW_MSG_SIZE_Y nMSGLEN_4 // # of MRF's to hold Y block data (4)
#define nDPW_BLOCK_SIZE_UV nBLOCK_WIDTH_8+nBLOCK_HEIGHT_8 // U/V interleaved block width and height (8x8)
#define nDPW_MSG_SIZE_UV nMSGLEN_2 // # of MRF's to hold U/V block data (2)
#if (nSRC_REGION==nREGION_1)
#define udSRC_Y udBOT_Y_IO
#define udSRC_U udBOT_U_IO
#define udSRC_V udBOT_V_IO
#define ubSRC_Y ubBOT_Y
#define ubSRC_U ubBOT_U
#define ubSRC_V ubBOT_V
#define uwSRC_U uwBOT_U //For masking operation
#define uwSRC_V uwBOT_V
#define ub2DEST_Y ub2TOP_Y
#define ub2DEST_U ub2TOP_U
#define ub2DEST_V ub2TOP_V
#elif (nSRC_REGION==nREGION_2)
#define udSRC_Y udTOP_Y_IO
#define udSRC_U udTOP_U_IO
#define udSRC_V udTOP_V_IO
#define ubSRC_Y ubTOP_Y
#define ubSRC_U ubTOP_U
#define ubSRC_V ubTOP_V
#define uwSRC_U uwTOP_U //For masking operation
#define uwSRC_V uwTOP_V
#define ub2DEST_Y ub2BOT_Y
#define ub2DEST_U ub2BOT_U
#define ub2DEST_V ub2BOT_V
#endif
///* Yoni - masking is not relevant for ILK?!?
//#define TEMP0 REG(r,54)
//.declare TEMP Base=TEMP0 ElementSize=2 SrcRegion=<8;8,1> Type=uw
///* Yoni - masking is not relevant for ILK?!?
i965_drv_video/shaders/post_processing/Common/PL5x8_PL16x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
PL5x8_PL16x8.asm
#
include
"
Expansion.inc
"
//------------------------------
Horizontal
Upconversion
-----------------------------
$for
(
nUV_NUM_OF_ROWS
/
2
-
1
; >-1; -1) {
avg.sat
(
16
)
uwDEST_U
(
0
,
%
1
*
32
+
16
)
uwDEST_U
(
0
,
%
1
*
16
+
7
)
<
1
;2,0> uwDEST_U(0, %1*16+7)<1;2,1>
avg.sat
(
16
)
uwDEST_V
(
0
,
%
1
*
32
+
16
)
uwDEST_V
(
0
,
%
1
*
16
+
7
)
<
1
;2,0> uwDEST_V(0, %1*16+7)<1;2,1>
avg.sat
(
16
)
uwDEST_U
(
0
,
%
1
*
32
)
uwDEST_U
(
0
,
%
1
*
16
)
<
1
;2,0> uwDEST_U(0, %1*16)<1;2,1>
avg.sat
(
16
)
uwDEST_V
(
0
,
%
1
*
32
)
uwDEST_V
(
0
,
%
1
*
16
)
<
1
;2,0> uwDEST_V(0, %1*16)<1;2,1>
}
$for
(
nUV_NUM_OF_ROWS
/
2
-
1
; >-1; -1) {
avg.sat
(
16
)
uwDEST_U
(
0
,
%
1
*
32
+
16
)
uwDEST_U
(
0
,
%
1
*
32
+
18
)
<
1
;2,0> uwDEST_U(0, %1*32+18)<1;2,1>
avg.sat
(
16
)
uwDEST_V
(
0
,
%
1
*
32
+
16
)
uwDEST_V
(
0
,
%
1
*
32
+
18
)
<
1
;2,0> uwDEST_V(0, %1*32+18)<1;2,1>
avg.sat
(
16
)
uwDEST_U
(
0
,
%
1
*
32
)
uwDEST_U
(
0
,
%
1
*
32
)
<
1
;2,0> uwDEST_U(0, %1*32)<1;2,1>
avg.sat
(
16
)
uwDEST_V
(
0
,
%
1
*
32
)
uwDEST_V
(
0
,
%
1
*
32
)
<
1
;2,0> uwDEST_V(0, %1*32)<1;2,1>
}
//
End
of
PL5x8_PL16x8
i965_drv_video/shaders/post_processing/Common/PL5x8_PL8x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
PL5x8_PL8x8.asm
#
include
"
Expansion.inc
"
//------------------------------
Horizontal
Upconversion
-----------------------------
$for
(
0
; <nUV_NUM_OF_ROWS; 1) {
avg.sat
(
8
)
uwDEST_U
(
0
,
%
1
*
8
)
uwDEST_U
(
0
,
%
1
*
8
)
<
1
;2,0> uwDEST_U(0, %1*8)<1;2,1>
avg.sat
(
8
)
uwDEST_V
(
0
,
%
1
*
8
)
uwDEST_V
(
0
,
%
1
*
8
)
<
1
;2,0> uwDEST_V(0, %1*8)<1;2,1>
}
//
End
of
PL5x8_PL8x8
i965_drv_video/shaders/post_processing/Common/PL8x4_Save_IMC3.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
PL8x4_Save_IMC3.asm
//
//
Save
planar
YUV420
frame
data
bl
ock
of
si
ze
16
x8
#
include
"
PL8x4_Save_IMC3.inc
"
//
Use
the
mask
to
determine
which
pixels
shouldn
'
t
be
over
-
written
and
(
1
)
acc0.0
<
1
>
:
ud
udBLOCK_MASK
<
0
;1,0>:ud 0x00FFFFFF:ud
cmp.ge.f0.0
(
1
)
dNULLREG
acc0.0
<
0
;1,0>:ud 0x00FFFFFF:ud //Check if all pixels in the block need to be modified
(
f0.0
)
jmpi
WritePlanarToDataPort
//
If
mask
is
not
al
l
1
'
s
,
then
load
the
entire
16
x8
bl
ock
//
so
that
only
those
byte
s
may
be
modified
that
need
to
be
(
using
the
mask
)
//
Load
16
x8
planar
Y
----------------------------------------------------------
mov
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w // Block origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_Y
:
ud
//
Bl
ock
width
and
height
(
16
x8
)
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_Y
(
0
)
<
1
>
mMSGHDR
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_Y
+
nBI_DESTINATION_Y
:
ud
//
Load
8
x4
planar
U
and
V
-----------------------------------------------------
asr
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_UV
:
ud
//
Bl
ock
width
and
height
(
8
x4
)
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_U
(
0
)
<
1
>
mMSGHDR
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_DESTINATION_U
:
ud
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udSRC_V
(
0
)
<
1
>
mMSGHDR
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_DESTINATION_V
:
ud
//
expand
U
and
V
to
be
al
igned
on
word
boundary
-
Y
remains
in
byte
s
$for
(
nUV_NUM_OF_ROWS
/
2
-
1
; >-1; -1) {
mov
(
16
)
uwSRC_U
(
0
,
%
1
*
16
)
<
1
>
ubSRC_U
(
0
,
%
1
*
16
)
mov
(
16
)
uwSRC_V
(
0
,
%
1
*
16
)
<
1
>
ubSRC_V
(
0
,
%
1
*
16
)
}
//
Merge
the
data
mov
(
1
)
f0.0
:
uw
ubBLOCK_MASK_V
:
ub
//
Load
the
mask
on
flag
reg
(
f0.0
)
mov
(
8
)
rMASK_TEMP
<
1
>
:
uw
uwBLOCK_MASK_H
:
uw
(-
f0.0
)
mov
(
8
)
rMASK_TEMP
<
1
>
:
uw
0
:
uw
//
Destination
is
Word
al
igned
$for
(
0
; <nY_NUM_OF_ROWS; 2) {
mov
(
1
)
f0.1
:
uw
uwMASK_TEMP
(
0
,
%
1
)
<
0
;1,0>
(-
f0.1
)
mov
(
16
)
ub2DEST_Y
(
0
,
%
1
*
32
)
<
2
>
ubSRC_Y
(
0
,
%
1
*
16
)
(-
f0.1
)
mov
(
16
)
ub2DEST_U
(
0
,
%
1
*
8
)
<
1
>
ubSRC_U
(
0
,
%
1
*
8
)
//
only
works
for
Word
al
igned
Byte
data
(-
f0.1
)
mov
(
16
)
ub2DEST_V
(
0
,
%
1
*
8
)
<
1
>
ubSRC_V
(
0
,
%
1
*
8
)
//
only
works
for
Word
al
igned
Byte
data
mov
(
1
)
f0.1
:
uw
uwMASK_TEMP
(
0
,
1
+%
1
)
<
0
;1,0>
(-
f0.1
)
mov
(
16
)
ub2DEST_Y
(
0
,
1
+%
1
*
32
)
<
2
>
ubSRC_Y
(
0
,
1
+%
1
*
16
)
}
WritePlanarToDataPort:
#
if
!
defined
(
SAVE_UV_ONLY
)
//
Save
current
planar
frame
Y
bl
ock
data
(
16
x8
)
-------------------------------
mov
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w // Block origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_Y
:
ud
//
Bl
ock
width
and
height
(
16
x8
)
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
$for
(
0
,
0
; <nY_NUM_OF_ROWS; 2,1) {
mov
(
16
)
mubMSGPAYLOAD
(
%
2
,
0
)
<
1
>
ub2DEST_Y
(
%
1
)
REGION
(
16
,
2
)
mov
(
16
)
mubMSGPAYLOAD
(
%
2
,
16
)
<
1
>
ub2DEST_Y
(
%
1
+
1
)
REGION
(
16
,
2
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_Y
+
nBI_DESTINATION_Y
:
ud
#
endif
//
Save
U
/
V
data
bl
ock
in
planar
format
(
8
x4
)
----------------------------------
asr
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_UV
:
ud
//
Bl
ock
width
and
height
(
8
x4
)
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
//
Save
U
picture
data
---------------------------------------------------------
mov
(
16
)
mubMSGPAYLOAD
(
0
,
0
)
<
1
>
ub2DEST_U
(
0
)
REGION
(
16
,
2
)
//
U
rows
0
,
1
mov
(
16
)
mubMSGPAYLOAD
(
0
,
16
)
<
1
>
ub2DEST_U
(
1
)
REGION
(
16
,
2
)
//
U
rows
2
,
3
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_UV
+
nBI_DESTINATION_U
:
ud
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
//
Save
V
picture
data
---------------------------------------------------------
mov
(
16
)
mubMSGPAYLOAD
(
0
,
0
)
<
1
>
ub2DEST_V
(
0
)
REGION
(
16
,
2
)
//
V
rows
0
,
1
mov
(
16
)
mubMSGPAYLOAD
(
0
,
16
)
<
1
>
ub2DEST_V
(
1
)
REGION
(
16
,
2
)
//
V
rows
2
,
3
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_UV
+
nBI_DESTINATION_V
:
ud
//
End
of
PL8x4_Save_IMC3
i965_drv_video/shaders/post_processing/Common/PL8x4_Save_IMC3.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Module name: PL8x4_Save_IMC3.inc
//
// Setup for storing planar data
//
#include "undefall.inc" //Undefine the SRC and DEST sysmbols
// For saving
#define nDPW_BLOCK_SIZE_Y nBLOCK_WIDTH_16+nBLOCK_HEIGHT_8 // Y block size 16x8
#define nDPW_MSG_SIZE_Y nMSGLEN_4 // # of MRF's to hold Y block data (4)
#define nDPW_BLOCK_SIZE_UV nBLOCK_WIDTH_8+nBLOCK_HEIGHT_4 // U/V block size 8x4
#define nDPW_MSG_SIZE_UV nMSGLEN_1 // # of MRF's to hold U/V block data (1)
// For masking
#undef nDPR_MSG_SIZE_Y
#define nDPR_MSG_SIZE_Y nRESLEN_4 // # of MRF's to hold Y block data (4)
#undef nDPR_MSG_SIZE_UV
#define nDPR_MSG_SIZE_UV nRESLEN_1 // # of MRF's to hold U/V block data (1)
#define rMASK_TEMP REG(r,nTEMP0)
.
declare
uwMASK_TEMP
Base
=
rMASK_TEMP
ElementSize
=
2
SrcRegion
=<
8
;
8
,
1
>
Type
=
uw
//1 GRF
#if (nSRC_REGION==nREGION_1)
// For saving
#define ub2DEST_Y ub2TOP_Y
#define ub2DEST_U ub2TOP_U
#define ub2DEST_V ub2TOP_V
//For masking operation
#define udSRC_Y udBOT_Y_IO
#define udSRC_U udBOT_U_IO
#define udSRC_V udBOT_V_IO
#define ubSRC_Y ubBOT_Y
#define ubSRC_U ubBOT_U
#define ubSRC_V ubBOT_V
#define uwSRC_U uwBOT_U
#define uwSRC_V uwBOT_V
#elif (nSRC_REGION==nREGION_2)
// For saving
#define ub2DEST_Y ub2BOT_Y
#define ub2DEST_U ub2BOT_U
#define ub2DEST_V ub2BOT_V
//For masking operation
#define udSRC_Y udTOP_Y_IO
#define udSRC_U udTOP_U_IO
#define udSRC_V udTOP_V_IO
#define ubSRC_Y ubTOP_Y
#define ubSRC_U ubTOP_U
#define ubSRC_V ubTOP_V
#define uwSRC_U uwTOP_U
#define uwSRC_V uwTOP_V
#endif
i965_drv_video/shaders/post_processing/Common/PL8x4_Save_NV12.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
PL8x4_Save_NV12.asm
//
//
Save
entire
current
planar
frame
data
bl
ock
of
si
ze
16
x8
//---------------------------------------------------------------
//
Symbols
needed
to
be
defined
before
including
this
module
//
//
DWORD_ALIGNED_DEST:
only
if
DEST_Y
,
DEST_U
,
DEST_V
data
are
DWord
al
igned
//
ORIX:
//---------------------------------------------------------------
#
include
"
PL8x4_Save_NV12.inc
"
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
#
if
!
defined
(
SAVE_UV_ONLY
)
//
Save
current
planar
frame
Y
bl
ock
data
(
16
x8
)
-------------------------------
mov
(
2
)
mMSGHDR.0
<
1
>
:
d
wORIX
<
2
;2,1>:w // Block origin
mov
(
1
)
mMSGHDR.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_Y
:
ud
//
Bl
ock
width
and
height
(
16
x8
)
#
endif
//
Use
the
mask
to
determine
which
pixels
shouldn
'
t
be
over
-
written
and
(
1
)
acc0
<
1
>
:
ud
udBLOCK_MASK
<
0
;1,0>:ud 0x00FFFFFF:ud
cmp.ge.f0.0
(
1
)
dNULLREG
acc0
<
0
;1,0>:ud 0x00FFFFFF:ud //Check if all pixels in the block need to be modified
(
f0.0
)
jmpi
WritePlanarToDataPort
//
If
mask
is
not
al
l
1
'
s
,
then
load
the
entire
16
x8
bl
ock
//
so
that
only
those
byte
s
may
be
modified
that
need
to
be
(
using
the
mask
)
send
(
8
)
udSRC_Y
(
0
)
<
1
>
mMSGHDR
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_Y
+
nBI_DESTINATION_Y
:
ud
//
16
x8
asr
(
1
)
rMSGSRC.1
<
1
>
:
ud
wORIY
<
0
;1,0>:w 1:w { NoDDClr } // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_UV
:
ud
{
NoDDChk
}
//
Bl
ock
width
and
height
(
16
x4
)
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud //move message desrcptor to the message header
send
(
8
)
udSRC_U
(
0
)
<
1
>
mMSGHDR
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_UV
+
nBI_DESTINATION_UV
:
ud
//
Rest
ore
the
origin
information
mov
(
2
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
2
;2,1>:w // Block origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_Y
:
ud
//
Bl
ock
width
and
height
(
16
x8
)
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud //move message desrcptor to the message header
//
Merge
the
data
mov
(
1
)
f0.1
:
uw
ubBLOCK_MASK_V
:
ub
//
Load
the
mask
on
flag
reg
(
f0.1
)
mov
(
8
)
rMASK_TEMP
<
1
>
:
uw
uwBLOCK_MASK_H
:
uw
(-
f0.1
)
mov
(
8
)
rMASK_TEMP
<
1
>
:
uw
0
:
uw
//
convert
the
mask
from
16
bits
to
8
bits
by
selecting
every
other
bit
mov
(
1
)
udMASK_TEMP1
(
0
,
0
)
<
1
>
0x00040001
:
ud
mov
(
1
)
udMASK_TEMP1
(
0
,
1
)
<
1
>
0x00400010
:
ud
mov
(
1
)
udMASK_TEMP1
(
0
,
2
)
<
1
>
0x04000100
:
ud
mov
(
1
)
udMASK_TEMP1
(
0
,
3
)
<
1
>
0x40001000
:
ud
//
merge
the
loaded
bl
ock
with
the
current
bl
ock
$for
(
0
,
0
; <nY_NUM_OF_ROWS; 2,1) {
mov
(
1
)
f0.1
:
uw
uwMASK_TEMP
(
0
,
%
1
)
<
0
;1,0>
(-
f0.1
)
mov
(
16
)
ubDEST_Y
(
0
,
%
1
*
32
)
<
2
>
ubSRC_Y
(
0
,
%
1
*
16
)
and.nz.f0.1
(
8
)
wNULLREG
uwMASK_TEMP
(
0
,
%
1
)
<
0
;1,0> uwMASK_TEMP1(0,0) //change the mask by selecting every other bit
(-
f0.1
)
mov
(
8
)
ubDEST_U
(
0
,
%
2
*
16
)
<
2
>
ub2SRC_U
(
0
,
%
1
*
8
)
<
16
;8,2>
(-
f0.1
)
mov
(
8
)
ubDEST_V
(
0
,
%
2
*
16
)
<
2
>
ub2SRC_U
(
0
,
%
1
*
8
+
1
)
<
16
;8,2>
mov
(
1
)
f0.1
:
uw
uwMASK_TEMP
(
0
,
1
+%
1
)
<
0
;1,0>
(-
f0.1
)
mov
(
16
)
ubDEST_Y
(
0
,
(
1
+%
1
)
*
32
)
<
2
>
ubSRC_Y
(
0
,
(
1
+%
1
)
*
16
)
}
WritePlanarToDataPort:
#
if
!
defined
(
SAVE_UV_ONLY
)
$for
(
0
,
0
; <nY_NUM_OF_ROWS; 2,1) {
mov
(
16
)
mubMSGPAYLOAD
(
%
2
,
0
)
<
1
>
ub2DEST_Y
(
%
1
)
REGION
(
16
,
2
)
mov
(
16
)
mubMSGPAYLOAD
(
%
2
,
16
)
<
1
>
ub2DEST_Y
(
%
1
+
1
)
REGION
(
16
,
2
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_Y
+
nBI_DESTINATION_Y
:
ud
#
endif
//**
Save
8
x4
packed
U
and
V
-----------------------------------------------------
//
we
could
write
di
rectly
wORIX
to
mMSGHDR
and
then
execute
asr
on
it
,
that
way
we
could
//
avoid
using
rMSGSRC
as
a
buffer
and
have
one
command
less
in
code
,
but
it
is
unknown
whether
//
it
is
possible
to
do
asr
on
mMSGHDR
so
we
use
rMSGSRC.
mov
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w // Block origin
asr
(
1
)
rMSGSRC.1
<
1
>
:
d
rMSGSRC.1
<
0
;1,0>:d 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_UV
:
ud
//
U
/
V
bl
ock
width
and
height
(
16
x4
)
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
$for
(
0
,
0
; <nY_NUM_OF_ROWS;4,1) {
mov
(
16
)
mubMSGPAYLOAD
(
%
2
,
0
)
<
2
>
ub2DEST_U
(
%
2
)
REGION
(
16
,
2
)
mov
(
16
)
mubMSGPAYLOAD
(
%
2
,
1
)
<
2
>
ub2DEST_V
(
%
2
)
REGION
(
16
,
2
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_UV
+
nBI_DESTINATION_UV
:
ud
//
End
of
PL8x4_Save_NV12
i965_drv_video/shaders/post_processing/Common/PL8x4_Save_NV12.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
//Module name: PL8x4_Save_NV12.inc
//
// Setup for storing planar data
//
#include "undefall.inc" //Undefine the SRC and DEST sysmbols
#undef nDPW_BLOCK_SIZE_Y
#undef nDPW_MSG_SIZE_Y
#undef nDPW_BLOCK_SIZE_UV
#undef nDPW_MSG_SIZE_UV
#define nDPW_BLOCK_SIZE_Y nBLOCK_WIDTH_16+nBLOCK_HEIGHT_8 // Y block size 16x8
#define nDPW_MSG_SIZE_Y nMSGLEN_4 // # of MRF's to hold Y block data (4)
#define nDPW_BLOCK_SIZE_UV nBLOCK_WIDTH_16+nBLOCK_HEIGHT_4 // U/V interleaved block width and height (16x4)
#define nDPW_MSG_SIZE_UV nMSGLEN_2 // # of MRF's to hold U/V block data (2)
// For masking
#undef nDPR_MSG_SIZE_Y
#define nDPR_MSG_SIZE_Y nRESLEN_4 // # of MRF's to hold Y block data (4)
#undef nDPR_MSG_SIZE_UV
#define nDPR_MSG_SIZE_UV nRESLEN_2
#define rMASK_TEMP REG(r,nTEMP0)
.
declare
uwMASK_TEMP
Base
=
rMASK_TEMP
ElementSize
=
2
SrcRegion
=<
8
;
8
,
1
>
Type
=
uw
//1 GRF
#define rMASK_TEMP1 REG(r,nTEMP1)
.
declare
udMASK_TEMP1
Base
=
rMASK_TEMP1
ElementSize
=
4
SrcRegion
=<
4
;
4
,
1
>
Type
=
ud
//1 GRF
.
declare
uwMASK_TEMP1
Base
=
rMASK_TEMP1
ElementSize
=
2
SrcRegion
=<
8
;
8
,
1
>
Type
=
uw
//1 GRF
#if (nSRC_REGION==nREGION_1)
#define udSRC_Y udBOT_Y_IO
#define udSRC_U udBOT_U_IO
#define udSRC_V udBOT_V_IO
#define ubSRC_Y ubBOT_Y
#define ubSRC_U ubBOT_U
#define ubSRC_V ubBOT_V
#define uwSRC_U uwBOT_U //For masking operation
#define uwSRC_V uwBOT_V
#define ub2DEST_Y ub2TOP_Y
#define ub2DEST_U ub2TOP_U
#define ub2DEST_V ub2TOP_V
#define ubDEST_Y ubTOP_Y
#define ubDEST_U ubTOP_U
#define ubDEST_V ubTOP_V
#define ub2SRC_U ub2BOT_U
#elif (nSRC_REGION==nREGION_2)
#define udSRC_Y udTOP_Y_IO
#define udSRC_U udTOP_U_IO
#define udSRC_V udTOP_V_IO
#define ubSRC_Y ubTOP_Y
#define ubSRC_U ubTOP_U
#define ubSRC_V ubTOP_V
#define uwSRC_U uwTOP_U //For masking operation
#define uwSRC_V uwTOP_V
#define ub2DEST_Y ub2BOT_Y
#define ub2DEST_U ub2BOT_U
#define ub2DEST_V ub2BOT_V
#define ubDEST_Y ubBOT_Y
#define ubDEST_U ubBOT_U
#define ubDEST_V ubBOT_V
#define ub2SRC_U ub2TOP_U
#endif
///* Yoni - masking is not relevant for ILK?!?
//#define TEMP0 REG(r,54)
//.declare TEMP Base=TEMP0 ElementSize=2 SrcRegion=<8;8,1> Type=uw
///* Yoni - masking is not relevant for ILK?!?
i965_drv_video/shaders/post_processing/Common/PL8x5_PL8x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
PL8x5_PL8x8.asm
#
include
"
Expansion.inc
"
//-------------------------------
Vertical
Upconversion
------------------------------
avg.sat
(
8
)
uwDEST_U
(
0
,
3
*
16
+
8
)
<
1
>
uwDEST_U
(
0
,
3
*
8
)
uwDEST_U
(
0
,
(
1
+
3
)
*
8
)
//
Optimization
avg.sat
(
8
)
uwDEST_V
(
0
,
3
*
16
+
8
)
<
1
>
uwDEST_V
(
0
,
3
*
8
)
uwDEST_V
(
0
,
(
1
+
3
)
*
8
)
//
Optimization
$for
(
nUV_NUM_OF_ROWS
/
2
-
2
; >-1; -1) {
mov
(
8
)
uwDEST_U
(
0
,
(
1
+%
1
)
*
16
)
<
1
>
uwDEST_U
(
0
,
(
1
+%
1
)
*
8
)
avg.sat
(
8
)
uwDEST_U
(
0
,
%
1
*
16
+
8
)
<
1
>
uwDEST_U
(
0
,
%
1
*
8
)
uwDEST_U
(
0
,
(
1
+%
1
)
*
8
)
mov
(
8
)
uwDEST_V
(
0
,
(
1
+%
1
)
*
16
)
<
1
>
uwDEST_V
(
0
,
(
1
+%
1
)
*
8
)
avg.sat
(
8
)
uwDEST_V
(
0
,
%
1
*
16
+
8
)
<
1
>
uwDEST_V
(
0
,
%
1
*
8
)
uwDEST_V
(
0
,
(
1
+%
1
)
*
8
)
}
//
End
of
PL8x5_PL8x8
i965_drv_video/shaders/post_processing/Common/PL8x8_PL8x4.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
PL8x8_PL8x4.asm
//
//
Convert
PL
8
x8
to
PL8x4
in
GRF
//---------------------------------------------------------------
//
Symbols
needed
to
be
defined
before
including
this
module
//
//
DWORD_ALIGNED_DEST:
only
if
DEST_Y
,
DEST_U
,
DEST_V
data
are
DWord
al
igned
//
ORIX:
//---------------------------------------------------------------
#
include
"
PL8x8_PL8x4.inc
"
//
Convert
PL8x8
to
PL8x4
---------------------------------------------------------
mov
(
8
)
ubDEST_U
(
0
,
16
)
<
2
>
ubDEST_U
(
1
)
<
16
;8,2> //selecting U every other row
mov
(
16
)
ubDEST_U
(
0
,
32
)
<
2
>
ubDEST_U
(
2
)
<
32
;8,2> //selecting U every other row
mov
(
8
)
ubDEST_V
(
0
,
16
)
<
2
>
ubDEST_V
(
1
)
<
16
;8,2> //selecting V every other row
mov
(
16
)
ubDEST_V
(
0
,
32
)
<
2
>
ubDEST_V
(
2
)
<
32
;8,2> //selecting V every other row
//
End
of
PL8x8_PL8x4.asm
-------------------------------------------------------
\ No newline at end of file
i965_drv_video/shaders/post_processing/Common/PL8x8_PL8x4.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Module name: PL8x8_PL8x4.inc
//
// Setup module for convert PL8x8 to PL8x4
//
//
// Source/destination region definitions
//
#include "undefall.inc" //Undefine the SRC and DEST sysmbols
#if (nSRC_REGION==nREGION_1)
//REGION_1 selected
#define ubDEST_Y ubTOP_Y
#define ubDEST_U ubTOP_U
#define ubDEST_V ubTOP_V
#elif (nSRC_REGION==nREGION_2)
//REGION_2 selected
#define ubDEST_Y ubBOT_Y
#define ubDEST_U ubBOT_U
#define ubDEST_V ubBOT_V
#endif
i965_drv_video/shaders/post_processing/Common/PL8x8_Save_P208.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
PL8x8_Save_P208.asm
//
//
Save
entire
current
planar
frame
data
bl
ock
of
si
ze
16
x8
//---------------------------------------------------------------
//
Symbols
needed
to
be
defined
before
including
this
module
//
//
DWORD_ALIGNED_DEST:
only
if
DEST_Y
,
DEST_U
,
DEST_V
data
are
DWord
al
igned
//
ORIX:
//---------------------------------------------------------------
#
include
"
PL8x8_Save_P208.inc
"
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
#
if
!
defined
(
SAVE_UV_ONLY
)
//
Save
current
planar
frame
Y
bl
ock
data
(
16
x8
)
-------------------------------
mov
(
2
)
mMSGHDR.0
<
1
>
:
d
wORIX
<
2
;2,1>:w // Block origin
mov
(
1
)
mMSGHDR.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_Y
:
ud
//
Bl
ock
width
and
height
(
16
x8
)
WritePlanarToDataPort:
$for
(
0
,
0
; <nY_NUM_OF_ROWS; 2,1) {
mov
(
16
)
mubMSGPAYLOAD
(
%
2
,
0
)
<
1
>
ub2DEST_Y
(
%
1
)
REGION
(
16
,
2
)
mov
(
16
)
mubMSGPAYLOAD
(
%
2
,
16
)
<
1
>
ub2DEST_Y
(
%
1
+
1
)
REGION
(
16
,
2
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_Y
+
nBI_DESTINATION_Y
:
ud
#
endif
//**
Save
8
x8
packed
U
and
V
-----------------------------------------------------
//
we
could
write
di
rectly
wORIX
to
mMSGHDR
and
then
execute
asr
on
it
,
that
way
we
could
//
avoid
using
rMSGSRC
as
a
buffer
and
have
one
command
less
in
code
,
but
it
is
unknown
whether
//
it
is
possible
to
do
asr
on
mMSGHDR
so
we
use
rMSGSRC.
mov
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w // Block origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_UV
:
ud
//
U
/
V
bl
ock
width
and
height
(
16
x4
)
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
$for
(
0
,
0
; <nY_NUM_OF_ROWS;2,1) {
mov
(
16
)
mubMSGPAYLOAD
(
%
2
,
0
)
<
2
>
ub2DEST_U
(
%
2
)
REGION
(
16
,
2
)
mov
(
16
)
mubMSGPAYLOAD
(
%
2
,
1
)
<
2
>
ub2DEST_V
(
%
2
)
REGION
(
16
,
2
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_UV
+
nBI_DESTINATION_UV
:
ud
//
End
of
PL8x8_Save_P208.asm
i965_drv_video/shaders/post_processing/Common/PL8x8_Save_P208.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
//Module name: PL8x8_Save_P208.inc
//
// Setup for storing planar data
//
#include "undefall.inc" //Undefine the SRC and DEST sysmbols
#define nDPW_BLOCK_SIZE_Y nBLOCK_WIDTH_16+nBLOCK_HEIGHT_8 // Y block size 16x8
#define nDPW_MSG_SIZE_Y nMSGLEN_4 // # of MRF's to hold Y block data (4)
#define nDPW_BLOCK_SIZE_UV nBLOCK_WIDTH_16+nBLOCK_HEIGHT_8 // U/V interleaved block width and height (16x8)
#define nDPW_MSG_SIZE_UV nMSGLEN_4 // # of MRF's to hold U/V block data (4)
#if (nSRC_REGION==nREGION_1)
#define udSRC_Y udBOT_Y_IO
#define udSRC_U udBOT_U_IO
#define udSRC_V udBOT_V_IO
#define ubSRC_Y ubBOT_Y
#define ubSRC_U ubBOT_U
#define ubSRC_V ubBOT_V
#define uwSRC_U uwBOT_U //For masking operation
#define uwSRC_V uwBOT_V
#define ub2DEST_Y ub2TOP_Y
#define ub2DEST_U ub2TOP_U
#define ub2DEST_V ub2TOP_V
#elif (nSRC_REGION==nREGION_2)
#define udSRC_Y udTOP_Y_IO
#define udSRC_U udTOP_U_IO
#define udSRC_V udTOP_V_IO
#define ubSRC_Y ubTOP_Y
#define ubSRC_U ubTOP_U
#define ubSRC_V ubTOP_V
#define uwSRC_U uwTOP_U //For masking operation
#define uwSRC_V uwTOP_V
#define ub2DEST_Y ub2BOT_Y
#define ub2DEST_U ub2BOT_U
#define ub2DEST_V ub2BOT_V
#endif
///* Yoni - masking is not relevant for ILK?!?
//#define TEMP0 REG(r,54)
//.declare TEMP Base=TEMP0 ElementSize=2 SrcRegion=<8;8,1> Type=uw
///* Yoni - masking is not relevant for ILK?!?
i965_drv_video/shaders/post_processing/Common/PL8x8_Save_PA.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
PL8x8_Save_PA.asm
//
//
Save
planar
YUV422
to
packed
YUV422
format
data
//
//
Note:
SRC_
*
must
reference
to
regions
with
data
type
"BYTE"
//
in
order
to
save
to
byte
-
al
igned
byte
location
#
include
"
PL8x8_Save_PA.inc
"
add
(
4
)
pCF_Y_OFFSET
<
1
>
:
uw
ubDEST_CF_OFFSET
<
4
;4,1>:ub nDEST_YUV_REG*nGRFWIB:w // Initial Y,U,V offset in YUV422 block
//
Pack
Y
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
16
)
r
[
pCF_Y_OFFSET
,
%
1
*
nGRFWIB
]
<
2
>
ubSRC_Y
(
0
,
%
1
*
32
)
}
//
Pack
U
/
V
$for
(
0
; <nUV_NUM_OF_ROWS; 1) {
mov
(
8
)
r
[
pCF_U_OFFSET
,
%
1
*
nGRFWIB
]
<
4
>
ubSRC_U
(
0
,
%
1
*
16
)
mov
(
8
)
r
[
pCF_V_OFFSET
,
%
1
*
nGRFWIB
]
<
4
>
ubSRC_V
(
0
,
%
1
*
16
)
}
shl
(
1
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
0
;1,0>:w 1:w { NoDDClr } // H. block origin need to be doubled
mov
(
1
)
rMSGSRC.1
<
1
>
:
d
wORIY
<
0
;1,0>:w { NoDDClr, NoDDChk } // Block origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_YUV
:
ud
{
NoDDChk
}
//
Bl
ock
width
and
height
(
32
x8
)
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
//
Use
the
mask
to
determine
which
pixels
shouldn
'
t
be
over
-
written
and
(
1
)
acc0.0
<
1
>
:
ud
udBLOCK_MASK
<
0
;1,0>:ud 0x00FFFFFF:ud
cmp.ge.f0.0
(
1
)
dNULLREG
acc0.0
<
0
;1,0>:ud 0x00FFFFFF:ud //Check if all pixels in the block need to be modified
(
f0.0
)
jmpi
WritePackedToDataPort
//
If
mask
is
not
al
l
1
'
s
,
then
load
the
entire
32
x8
bl
ock
//
so
that
only
those
byte
s
may
be
modified
that
need
to
be
(
using
the
mask
)
//
Load
32
x8
packed
YUV
422
----------------------------------------------------
send
(
8
)
udSRC_YUV
(
0
)
<
1
>
mMSGHDR
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_YUV
+
nBI_DESTINATION_YUV
:
ud
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
//
Merge
the
data
mov
(
1
)
f0.0
:
uw
ubBLOCK_MASK_V
:
ub
//
Load
the
mask
on
flag
reg
(
f0.0
)
mov
(
8
)
rMASK_TEMP
<
1
>
:
uw
uwBLOCK_MASK_H
:
uw
(-
f0.0
)
mov
(
8
)
rMASK_TEMP
<
1
>
:
uw
0
:
uw
//
Destination
is
Byte
al
igned
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
1
)
f0.1
:
uw
uwMASK_TEMP
(
0
,
%
1
)
<
0
;1,0>
(-
f0.1
)
mov
(
16
)
uwDEST_YUV
(
%
1
)
<
1
>
uwSRC_YUV
(
%
1
)
//
ch
eck
the
UV
merge
-
vK
}
WritePackedToDataPort:
//
Packed
YUV
data
are
stored
in
one
of
the
I
/
O
regions
before
moving
to
MRF
//
Note:
This
is
necessary
si
nce
indirect
addressing
is
not
supported
for
MRF.
//
Packed
data
bl
ock
should
be
saved
as
32
x8
pixel
bl
ock
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
8
)
mudMSGPAYLOAD
(
%
1
)
<
1
>
udDEST_YUV
(
%
1
)
REGION
(
8
,
1
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_YUV
+
nBI_DESTINATION_YUV
:
ud
//
End
of
PL8x8_Save_PA
i965_drv_video/shaders/post_processing/Common/PL8x8_Save_PA.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Module name: PL8x8_Save_PA.inc
//
// Setup for storing packed data
//
#include "undefall.inc" //Undefine the SRC and DEST sysmbols
// For saving
#define nDPW_BLOCK_SIZE_YUV nBLOCK_WIDTH_32+nBLOCK_HEIGHT_8 // YUV block size 32x8
#define nDPW_MSG_SIZE_YUV nMSGLEN_8 // # of MRF's to hold YUV block data (8)
// For masking
#undef nDPR_MSG_SIZE_YUV
#define nDPR_MSG_SIZE_YUV nRESLEN_8 // # of MRF's to hold YUV block data (8)
#define rMASK_TEMP REG(r,nTEMP0)
.
declare
uwMASK_TEMP
Base
=
rMASK_TEMP
ElementSize
=
2
SrcRegion
=<
8
;
8
,
1
>
Type
=
uw
//1 GRF
#if (nSRC_REGION==nREGION_1)
// For saving
#define udSRC_YUV udTOP_Y_IO
#define udDEST_YUV udBOT_Y_IO
#define nDEST_YUV_REG nBOT_Y
//For masking operation
#define ubSRC_Y ub2TOP_Y
#define ubSRC_U ub2TOP_U
#define ubSRC_V ub2TOP_V
#define uwSRC_YUV uwTOP_Y
#define uwDEST_YUV uwBOT_Y
#elif (nSRC_REGION==nREGION_2)
// For saving
#define udSRC_YUV udBOT_Y_IO
#define udDEST_YUV udTOP_Y_IO
#define nDEST_YUV_REG nTOP_Y
//For masking operation
#define ubSRC_Y ub2BOT_Y
#define ubSRC_U ub2BOT_U
#define ubSRC_V ub2BOT_V
#define uwSRC_YUV uwBOT_Y
#define uwDEST_YUV uwTOP_Y
#endif
i965_drv_video/shaders/post_processing/Common/PL9x5_PL16x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
PL9x5_PL16x8.asm
#
define
EXPAND_9x5
#
include
"
Expansion.inc
"
//------------------------------
Horizontal
Upconversion
-----------------------------
$for
(
nUV_NUM_OF_ROWS
-
2
; >-1; -1) {
avg.sat
(
16
)
uwDEST_U
(
0
,
%
1
*
16
)
<
1
>
uwDEST_U
(
0
,
%
1
*
16
)
<
1
;2,0> uwDEST_U(0, %1*16)<1;2,1>
avg.sat
(
16
)
uwDEST_V
(
0
,
%
1
*
16
)
<
1
>
uwDEST_V
(
0
,
%
1
*
16
)
<
1
;2,0> uwDEST_V(0, %1*16)<1;2,1>
}
#
undef
nUV_NUM_OF_ROWS
#
define
nUV_NUM_OF_ROWS
8
//
use
packed
version
of
al
l
post
-
processing
kernels
//-------------------------------
Vertical
Upconversion
------------------------------
avg.sat
(
16
)
uwDEST_U
(
0
,
3
*
32
+
16
)
<
1
>
uwDEST_U
(
0
,
3
*
16
)
uwDEST_U
(
0
,
(
1
+
3
)
*
16
)
avg.sat
(
16
)
uwDEST_V
(
0
,
3
*
32
+
16
)
<
1
>
uwDEST_V
(
0
,
3
*
16
)
uwDEST_V
(
0
,
(
1
+
3
)
*
16
)
$for
(
nUV_NUM_OF_ROWS
/
2
-
2
; >-1; -1) {
mov
(
16
)
uwDEST_U
(
0
,
(
1
+%
1
)
*
32
)
<
1
>
uwDEST_U
(
0
,
(
1
+%
1
)
*
16
)
avg.sat
(
16
)
uwDEST_U
(
0
,
%
1
*
32
+
16
)
<
1
>
uwDEST_U
(
0
,
%
1
*
16
)
uwDEST_U
(
0
,
(
1
+%
1
)
*
16
)
mov
(
16
)
uwDEST_V
(
0
,
(
1
+%
1
)
*
32
)
<
1
>
uwDEST_V
(
0
,
(
1
+%
1
)
*
16
)
avg.sat
(
16
)
uwDEST_V
(
0
,
%
1
*
32
+
16
)
<
1
>
uwDEST_V
(
0
,
%
1
*
16
)
uwDEST_V
(
0
,
(
1
+%
1
)
*
16
)
}
//
End
of
PL9x5_PL16x8
i965_drv_video/shaders/post_processing/Common/PL9x8_PL16x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
PL9x5_PL16x8.asm
#
include
"
Expansion.inc
"
//------------------------------
Horizontal
Upconversion
-----------------------------
$for
(
0
; <nUV_NUM_OF_ROWS; 1) {
avg.sat
(
16
)
uwDEST_U
(
0
,
%
1
*
16
)
<
1
>
uwDEST_U
(
0
,
%
1
*
16
)
<
1
;2,0> uwDEST_U(0, %1*16)<1;2,1>
avg.sat
(
16
)
uwDEST_V
(
0
,
%
1
*
16
)
<
1
>
uwDEST_V
(
0
,
%
1
*
16
)
<
1
;2,0> uwDEST_V(0, %1*16)<1;2,1>
}
//
End
of
PL9x5_PL16x8
\ No newline at end of file
i965_drv_video/shaders/post_processing/Common/RGB16x8_Save_RGB.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
RGB16x8_Save_RGB.asm
//
//
Save
packed
ARGB
444
frame
data
bl
ock
of
si
ze
16
x8
//
//
To
save
16
x8
bl
ock
(
64
x8
byte
layout
for
ARGB8888
)
we
need
2
send
instructions
//
---------
//
|
1
|
2
|
//
---------
#
include
"
RGB16x8_Save_RGB.inc
"
shl
(
1
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
0
;1,0>:w 2:w { NoDDClr } // H. block origin need to be quadrupled
mov
(
1
)
rMSGSRC.1
<
1
>
:
d
wORIY
<
0
;1,0>:w { NoDDClr, NoDDChk } // Block origin (1st quadrant)
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_ARGB
:
ud
{
NoDDChk
}
//
Bl
ock
width
and
height
(
32
x8
)
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
//
Use
the
mask
to
determine
which
pixels
shouldn
'
t
be
over
-
written
and
(
1
)
acc0.0
<
1
>
:
ud
udBLOCK_MASK
<
0
;1,0>:ud 0x00FFFFFF:ud
cmp.ge.f0.0
(
1
)
dNULLREG
acc0.0
<
0
;1,0>:ud 0x00FFFFFF:ud //Check if all pixels in the block need to be modified
(
f0.0
)
jmpi
WriteARGBToDataPort
//
If
mask
is
not
al
l
1
'
s
,
then
load
the
entire
64
x8
bl
ock
//
so
that
only
those
byte
s
may
be
modified
that
need
to
be
(
using
the
mask
)
//
Load
first
bl
ock
16
x8
packed
ARGB
444
---------------------------------------
or
(
1
)
acc0.0
<
1
>
:
ud
udBLOCK_MASK
<
0
;1,0>:ud 0xFF00FF00:ud //Check first block
cmp.e.f0.0
(
1
)
dNULLREG
acc0.0
<
0
;1,0>:ud 0xFFFFFFFF:ud
(
f0.0
)
jmpi
SkipFirstBlockMerge
//
If
full
mask
then
skip
this
bl
ock
send
(
8
)
udSRC_ARGB
(
0
)
<
1
>
mMSGHDR
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_ARGB
+
nBI_DESTINATION_RGB
:
ud
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
//
Merge
the
data
mov
(
1
)
f0.0
:
uw
ubBLOCK_MASK_V
:
ub
//
Load
the
mask
on
flag
reg
(
f0.0
)
mov
(
8
)
rMASK_TEMP
<
1
>
:
uw
uwBLOCK_MASK_H
:
uw
//
use
sel
instruction
-
vK
(-
f0.0
)
mov
(
8
)
rMASK_TEMP
<
1
>
:
uw
0
:
uw
$for
(
0
,
0
; <nY_NUM_OF_ROWS; 1, 2) { //take care of the lines in the block, they are different in the src and dest
mov
(
1
)
f0.1
:
uw
uwMASK_TEMP
(
0
,
%
1
)
<
0
;1,0>
(-
f0.1
)
mov
(
8
)
udDEST_ARGB
(
%
2
)
<
1
>
udSRC_ARGB
(
%
1
)
}
SkipFirstBlockMerge:
//
Load
second
bl
ock
16
x8
packed
ARGB
444
---------------------------------------
or
(
1
)
acc0.0
<
1
>
:
ud
udBLOCK_MASK
<
0
;1,0>:ud 0xFF0000FF:ud //Check second block
cmp.e.f0.0
(
1
)
dNULLREG
acc0.0
<
0
;1,0>:ud 0xFFFFFFFF:ud
(
f0.0
)
jmpi
WriteARGBToDataPort
//
If
full
mask
then
skip
this
bl
ock
add
(
1
)
mMSGHDR.0
<
1
>
:
d
rMSGSRC.0
<
0
;1,0>:d 32:d // Point to 2nd part
send
(
8
)
udSRC_ARGB
(
0
)
<
1
>
mMSGHDR
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_ARGB
+
nBI_DESTINATION_RGB
:
ud
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud // Point to 1st part again
//
Merge
the
data
mov
(
1
)
f0.0
:
uw
ubBLOCK_MASK_V
:
ub
//
Load
the
mask
on
flag
reg
(
f0.0
)
shr
(
8
)
rMASK_TEMP
<
1
>
:
uw
uwBLOCK_MASK_H
:
uw
8
:
uw
//
load
the
mask
for
second
bl
ock
(-
f0.0
)
mov
(
8
)
rMASK_TEMP
<
1
>
:
uw
0
:
uw
$for
(
0
,
1
; <nY_NUM_OF_ROWS; 1, 2) { //take care of the lines in the block, they are different in the src and dest
mov
(
1
)
f0.1
:
uw
uwMASK_TEMP
(
0
,
%
1
)
<
0
;1,0>
(-
f0.1
)
mov
(
8
)
udDEST_ARGB
(
%
2
)
<
1
>
udSRC_ARGB
(
%
1
)
}
WriteARGBToDataPort:
//
Move
packed
data
to
MRF
and
output
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
8
)
mudMSGPAYLOAD
(
%
1
)
<
1
>
udDEST_ARGB
(
%
1
*
2
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_ARGB
+
nBI_DESTINATION_RGB
:
ud
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
add
(
1
)
mMSGHDR.0
<
1
>
:
d
rMSGSRC.0
<
0
;1,0>:d 32:d // Point to 2nd part
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
8
)
mudMSGPAYLOAD
(
%
1
)
<
1
>
udDEST_ARGB
(
%
1
*
2
+
1
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_ARGB
+
nBI_DESTINATION_RGB
:
ud
//
End
of
RGB16x8_Save_RGB
i965_drv_video/shaders/post_processing/Common/RGB16x8_Save_RGB.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Module name: RGB16x8_Save_RGB.inc
//
#include "undefall.inc" //Undefine the SRC and DEST sysmbols
// For saving
#define nDPW_BLOCK_SIZE_ARGB nBLOCK_WIDTH_32+nBLOCK_HEIGHT_8 // ARGB block size 32x8
#define nDPW_MSG_SIZE_ARGB nMSGLEN_8 // # of MRF's to hold ARGB block data (8)
// For masking
#undef nDPR_MSG_SIZE_ARGB
#define nDPR_MSG_SIZE_ARGB nRESLEN_8 // # of MRF's to hold ARGB block data (8)
#define rMASK_TEMP REG(r,nTEMP0)
.
declare
uwMASK_TEMP
Base
=
rMASK_TEMP
ElementSize
=
2
SrcRegion
=<
8
;
8
,
1
>
Type
=
uw
//1 GRF
#if (nSRC_REGION==nREGION_1)
// For saving
#define udDEST_ARGB udTOP_Y_IO //The output of previous stage is stored here; This is modified and is written to render cache
//For masking operation
#define udSRC_ARGB udBOT_Y_IO //To hold the destination data that shouldn't be modified
#elif (nSRC_REGION==nREGION_2)
// For saving
#define udDEST_ARGB udBOT_Y_IO //The output of previous stage is stored here; This is modified and is written to render cache
//For masking operation
#define udSRC_ARGB udTOP_Y_IO //To hold the destination data that shouldn't be modified
#endif
i965_drv_video/shaders/post_processing/Common/RGB16x8_Save_RGB16.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
RGB16x8_Save_RGB16.asm
//
//
Save
packed
RGB565
frame
data
bl
ock
of
si
ze
16
x8
//
//
To
save
16
x8
bl
ock
(
32
x8
byte
layout
for
RGB565
)
we
need
1
send
instruction
//
-----
//
|
1
|
//
-----
#
include
"
RGB16x8_Save_RGB16.inc
"
//
convert
32
bit
RGB
to
16
bit
RGB
//
Truncate
A8R8G8B8
to
A6R5G6B5
within
byte
.
//
That
is
keeping
5
MSB
of
R
and
B
,
and
6
MSB
of
G.
$for
(
0
,
0
; <nY_NUM_OF_ROWS; 1, 2) {
shr
uwCSC_TEMP
(
%
1
,
0
)
<
1
>
ubDEST_ARGB
(
%
2
,
0
)
<
32
;8,4> 3:w // B >> 3
shl
(
16
)
uwTEMP_RGB16
(
0
)
<
1
>
uwDEST_ARGB
(
%
2
,
1
)
<
16
;8,2> 8:w // R << 8
and
(
16
)
uwTEMP_RGB16
(
0
)
<
1
>
uwTEMP_RGB16
(
0
)
0xF800
:
uw
or
(
16
)
uwCSC_TEMP
(
%
1
,
0
)
<
1
>
uwCSC_TEMP
(
%
1
,
0
)
<
16
;16,1> uwTEMP_RGB16(0)
shr
(
16
)
uwTEMP_RGB16
(
0
)
<
1
>
uwDEST_ARGB
(
%
2
,
0
)
<
16
;8,2> 5:w // G >> 5
and
(
16
)
uwTEMP_RGB16
(
0
)
<
1
>
uwTEMP_RGB16
(
0
)
0x07E0
:
uw
or
(
16
)
uwCSC_TEMP
(
%
1
,
0
)
<
1
>
uwCSC_TEMP
(
%
1
,
0
)
<
16
;16,1> uwTEMP_RGB16(0)
}
mov
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w // Block origin (1st quadrant)
shl
(
1
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
0
;1,0>:w 1:w // H. block origin need to be doubled for byte offset
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_RGB16
:
ud
//
Bl
ock
width
and
height
(
32
x8
)
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
//
Use
the
mask
to
determine
which
pixels
shouldn
'
t
be
over
-
written
and
(
1
)
acc0.0
<
1
>
:
ud
udBLOCK_MASK
<
0
;1,0>:ud 0x00FFFFFF:ud
cmp.ge.f0.0
(
1
)
dNULLREG
acc0.0
<
0
;1,0>:ud 0x00FFFFFF:ud //Check if all pixels in the block need to be modified
(
f0.0
)
jmpi
WriteRGB16ToDataPort
//
If
mask
is
not
al
l
1
'
s
,
then
load
the
entire
32
x8
bl
ock
//
so
that
only
those
byte
s
may
be
modified
that
need
to
be
(
using
the
mask
)
//
Load
32
x8
packed
RGB565
-----------------------------------------------------
send
(
8
)
udSRC_RGB16
(
0
)
<
1
>
mMSGHDR
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_RGB16
+
nBI_DESTINATION_RGB
:
ud
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
//
Merge
the
data
mov
(
1
)
f0.0
:
uw
ubBLOCK_MASK_V
:
ub
//
Load
the
mask
on
flag
reg
(
f0.0
)
mov
(
8
)
rMASK_TEMP
<
1
>
:
uw
uwBLOCK_MASK_H
:
uw
//
use
sel
instruction
-
vK
(-
f0.0
)
mov
(
8
)
rMASK_TEMP
<
1
>
:
uw
0
:
uw
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
1
)
f0.1
:
uw
uwMASK_TEMP
(
0
,
%
1
)
<
0
;1,0>
(-
f0.1
)
mov
(
16
)
uwCSC_TEMP
(
%
1
)
<
1
>
uwSRC_RGB16
(
%
1
)
}
WriteRGB16ToDataPort:
//
Move
packed
data
to
MRF
and
output
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
8
)
mudMSGPAYLOAD
(
%
1
)
<
1
>
udCSC_TEMP
(
%
1
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_RGB16
+
nBI_DESTINATION_RGB
:
ud
//
End
of
RGB16x8_Save_RGB16
i965_drv_video/shaders/post_processing/Common/RGB16x8_Save_RGB16.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Module name: RGB16x8_Save_RGB16.inc
//
#include "undefall.inc" //Undefine the SRC and DEST sysmbols
// For saving
#define nDPW_BLOCK_SIZE_RGB16 nBLOCK_WIDTH_32+nBLOCK_HEIGHT_8 // RGB16 block size 32x8
#define nDPW_MSG_SIZE_RGB16 nMSGLEN_8 // # of MRF's to hold RGB16 block data (8)
// For conversion to 16bit
.
declare
uwTEMP_RGB16
Base
=
REG
(
r
,
nTEMP1
)
ElementSize
=
2
SrcRegion
=<
16
;
16
,
1
>
Type
=
uw
//1 GRF
// For masking
#undef nDPR_MSG_SIZE_RGB16
#define nDPR_MSG_SIZE_RGB16 nRESLEN_8 // # of MRF's to hold ARGB block data (8)
#define rMASK_TEMP REG(r,nTEMP0)
.
declare
uwMASK_TEMP
Base
=
rMASK_TEMP
ElementSize
=
2
SrcRegion
=<
8
;
8
,
1
>
Type
=
uw
//1 GRF
#if (nSRC_REGION==nREGION_1)
// For saving
#define ubDEST_ARGB ubTOP_Y //Data from previous module
#define uwDEST_ARGB uwTOP_Y //Data from previous module
#define udCSC_TEMP udBOT_Y_IO //Data Converted to 16 bits
#define uwCSC_TEMP uwBOT_Y
//For masking operation
#define udSRC_RGB16 udTOP_Y_IO //To hold the destination data that shouldn't be modified
#define uwSRC_RGB16 uwTOP_Y
#elif (nSRC_REGION==nREGION_2)
// For saving
#define ubDEST_ARGB ubBOT_Y //Data from previous module
#define uwDEST_ARGB uwBOT_Y //Data from previous module
#define udCSC_TEMP udTOP_Y_IO //Data Converted to 16 bits
#define uwCSC_TEMP uwTOP_Y
//For masking operation
#define udSRC_RGB16 udBOT_Y_IO //To hold the destination data that shouldn't be modified
#define uwSRC_RGB16 uwBOT_Y
#endif
i965_drv_video/shaders/post_processing/Common/RGB16x8_Save_Y416.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
RGB16x8_Save_Y416.asm
//
//
Save
packed
ARGB
444
frame
data
bl
ock
of
si
ze
16
x8
//
//
To
save
16
x8
bl
ock
(
128
x8
byte
layout
for
ARGB
16
bit
per
component
)
we
need
4
send
instructions
//
-----------------
//
|
1
|
2
|
3
|
4
|
//
-----------------
#
include
"
RGB16x8_Save_RGB.inc
"
shl
(
1
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
0
;1,0>:w 3:w { NoDDClr } // H. block origin need to become 8 times
mov
(
1
)
rMSGSRC.1
<
1
>
:
d
wORIY
<
0
;1,0>:w { NoDDClr, NoDDChk } // Block origin (1st quadrant)
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_ARGB
:
ud
{
NoDDChk
}
//
Bl
ock
width
and
height
(
32
x8
)
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
/*
Not
needed
for
validation
kernels
for
now
-
vK
//
Use
the
mask
to
determine
which
pixels
shouldn
'
t
be
over
-
written
and
(
1
)
acc0.0
<
1
>
:
ud
udBLOCK_MASK
<
0
;1,0>:ud 0x00FFFFFF:ud
cmp.ge.f0.0
(
1
)
dNULLREG
acc0.0
<
0
;1,0>:ud 0x00FFFFFF:ud //Check if all pixels in the block need to be modified
(
f0.0
)
jmpi
WriteARGBToDataPort
//
If
mask
is
not
al
l
1
'
s
,
then
load
the
entire
64
x8
bl
ock
//
so
that
only
those
byte
s
may
be
modified
that
need
to
be
(
using
the
mask
)
//
Load
first
bl
ock
16
x8
packed
ARGB
444
---------------------------------------
or
(
1
)
acc0.0
<
1
>
:
ud
udBLOCK_MASK
<
0
;1,0>:ud 0xFF00FF00:ud //Check first block
cmp.e.f0.0
(
1
)
dNULLREG
acc0.0
<
0
;1,0>:ud 0xFFFFFFFF:ud
(
f0.0
)
jmpi
SkipFirstBlockMerge
//
If
full
mask
then
skip
this
bl
ock
send
(
8
)
udSRC_ARGB
(
0
)
<
1
>
mMSGHDR
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_ARGB
+
nBI_DESTINATION_RGB
:
ud
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
//
Merge
the
data
mov
(
1
)
f0.0
:
uw
ubBLOCK_MASK_V
:
ub
//
Load
the
mask
on
flag
reg
(
f0.0
)
mov
(
8
)
rMASK_TEMP
<
1
>
:
uw
uwBLOCK_MASK_H
:
uw
//
use
sel
instruction
-
vK
(-
f0.0
)
mov
(
8
)
rMASK_TEMP
<
1
>
:
uw
0
:
uw
$for
(
0
,
0
; <nY_NUM_OF_ROWS; 1, 2) { //take care of the lines in the block, they are different in the src and dest
mov
(
1
)
f0.1
:
uw
uwMASK_TEMP
(
0
,
%
1
)
<
0
;1,0>
(-
f0.1
)
mov
(
8
)
udDEST_ARGB
(
%
2
)
<
1
>
udSRC_ARGB
(
%
1
)
}
SkipFirstBlockMerge:
//
Load
second
bl
ock
16
x8
packed
ARGB
444
---------------------------------------
or
(
1
)
acc0.0
<
1
>
:
ud
udBLOCK_MASK
<
0
;1,0>:ud 0xFF0000FF:ud //Check second block
cmp.e.f0.0
(
1
)
dNULLREG
acc0.0
<
0
;1,0>:ud 0xFFFFFFFF:ud
(
f0.0
)
jmpi
WriteARGBToDataPort
//
If
full
mask
then
skip
this
bl
ock
add
(
1
)
mMSGHDR.0
<
1
>
:
d
rMSGSRC.0
<
0
;1,0>:d 32:d // Point to 2nd part
send
(
8
)
udSRC_ARGB
(
0
)
<
1
>
mMSGHDR
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nDPR_MSG_SIZE_ARGB
+
nBI_DESTINATION_RGB
:
ud
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud // Point to 1st part again
//
Merge
the
data
mov
(
1
)
f0.0
:
uw
ubBLOCK_MASK_V
:
ub
//
Load
the
mask
on
flag
reg
(
f0.0
)
shr
(
8
)
rMASK_TEMP
<
1
>
:
uw
uwBLOCK_MASK_H
:
uw
8
:
uw
//
load
the
mask
for
second
bl
ock
(-
f0.0
)
mov
(
8
)
rMASK_TEMP
<
1
>
:
uw
0
:
uw
$for
(
0
,
1
; <nY_NUM_OF_ROWS; 1, 2) { //take care of the lines in the block, they are different in the src and dest
mov
(
1
)
f0.1
:
uw
uwMASK_TEMP
(
0
,
%
1
)
<
0
;1,0>
(-
f0.1
)
mov
(
8
)
udDEST_ARGB
(
%
2
)
<
1
>
udSRC_ARGB
(
%
1
)
}
*/
WriteARGBToDataPort:
//
Move
packed
data
to
MRF
and
output
//
Write
1
st
4
X8
pixels
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
8
)
mudMSGPAYLOAD
(
%
1
)
<
1
>
udDEST_ARGB
(
%
1
*
4
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_ARGB
+
nBI_DESTINATION_RGB
:
ud
//
Write
2
nd
4
X8
pixels
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
add
(
1
)
mMSGHDR.0
<
1
>
:
d
rMSGSRC.0
<
0
;1,0>:d 32:d // Point to 2nd part
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
8
)
mudMSGPAYLOAD
(
%
1
)
<
1
>
udDEST_ARGB
(
%
1
*
4
+
1
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_ARGB
+
nBI_DESTINATION_RGB
:
ud
//
Write
3
rd
4
X8
pixels
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
add
(
1
)
mMSGHDR.0
<
1
>
:
d
rMSGSRC.0
<
0
;1,0>:d 64:d // Point to 2nd part
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
8
)
mudMSGPAYLOAD
(
%
1
)
<
1
>
udDEST_ARGB
(
%
1
*
4
+
2
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_ARGB
+
nBI_DESTINATION_RGB
:
ud
//
Write
4
th
4
X8
pixels
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
add
(
1
)
mMSGHDR.0
<
1
>
:
d
rMSGSRC.0
<
0
;1,0>:d 96:d // Point to 2nd part
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
8
)
mudMSGPAYLOAD
(
%
1
)
<
1
>
udDEST_ARGB
(
%
1
*
4
+
3
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_ARGB
+
nBI_DESTINATION_RGB
:
ud
//
End
of
RGB16x8_Save_Y416
i965_drv_video/shaders/post_processing/Common/RGB16x8_Save_Y416.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Module name: RGB16x8_Save_Y416.inc
//
#include "undefall.inc" //Undefine the SRC and DEST sysmbols
// For saving
#define nDPW_BLOCK_SIZE_ARGB nBLOCK_WIDTH_32+nBLOCK_HEIGHT_8 // ARGB block size 32x8
#define nDPW_MSG_SIZE_ARGB nMSGLEN_8 // # of MRF's to hold ARGB block data (8)
// For masking
#undef nDPR_MSG_SIZE_ARGB
#define nDPR_MSG_SIZE_ARGB nRESLEN_8 // # of MRF's to hold ARGB block data (8)
#define rMASK_TEMP REG(r,nTEMP0)
.
declare
uwMASK_TEMP
Base
=
rMASK_TEMP
ElementSize
=
2
SrcRegion
=<
8
;
8
,
1
>
Type
=
uw
//1 GRF
#if (nSRC_REGION==nREGION_1)
// For saving
#define udDEST_ARGB udTOP_Y_IO //The output of previous stage is stored here; This is modified and is written to render cache
//For masking operation
#define udSRC_ARGB udBOT_Y_IO //To hold the destination data that shouldn't be modified
#elif (nSRC_REGION==nREGION_2)
// For saving
#define udDEST_ARGB udBOT_Y_IO //The output of previous stage is stored here; This is modified and is written to render cache
//For masking operation
#define udSRC_ARGB udTOP_Y_IO //To hold the destination data that shouldn't be modified
#endif
i965_drv_video/shaders/post_processing/Common/RGB_Pack.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
.declare
SRC_B
Base
=
REG
(
r
,
10
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
Ds
tRegion
=
<
1
>
Type
=
uw
.declare
SRC_G
Base
=
REG
(
r
,
18
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
Ds
tRegion
=
<
1
>
Type
=
uw
.declare
SRC_R
Base
=
REG
(
r
,
26
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
Ds
tRegion
=
<
1
>
Type
=
uw
.declare
SRC_A
Base
=
REG
(
r
,
34
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
Ds
tRegion
=
<
1
>
Type
=
uw
#
define
DEST_ARGB
ubBOT_ARGB
#
undef
nSRC_REGION
#
define
nSRC_REGION
nREGION_2
//
Pack
di
rectly
to
mrf
as
optimization
-
vK
$for
(
0
,
0
; <8; 1, 2) {
//
mov
(
16
)
DEST_ARGB
(
%
2
,
0
)
<
4
>
SRC_B
(
%
1
)
{
Compr
,
NoDDClr
}
//
16
B
//
mov
(
16
)
DEST_ARGB
(
%
2
,
1
)
<
4
>
SRC_G
(
%
1
)
{
Compr
,
NoDDClr
,
NoDDChk
}
//
16
G
//
mov
(
16
)
DEST_ARGB
(
%
2
,
2
)
<
4
>
SRC_R
(
%
1
)
{
Compr
,
NoDDClr
,
NoDDChk
}
//
16
R
//
these
2
inst
can
be
merged
-
vK
//
mov
(
16
)
DEST_ARGB
(
%
2
,
3
)
<
4
>
SRC_A
(
%
1
)
{
Compr
,
NoDDChk
}
//
DEST_RGB_FORMAT
<
0
;1,0>:ub { Compr, NoDDChk } // 16 A
mov
(
8
)
DEST_ARGB
(
%
2
,
0
)
<
4
>
SRC_B
(
%
1
)
{
NoDDClr
}
//
8
B
mov
(
8
)
DEST_ARGB
(
%
2
,
1
)
<
4
>
SRC_G
(
%
1
)
{
NoDDClr
,
NoDDChk
}
//
8
G
mov
(
8
)
DEST_ARGB
(
%
2
,
2
)
<
4
>
SRC_R
(
%
1
)
{
NoDDClr
,
NoDDChk
}
//
8
R
mov
(
8
)
DEST_ARGB
(
%
2
,
3
)
<
4
>
SRC_A
(
%
1
)
{
NoDDChk
}
//
8
A
mov
(
8
)
DEST_ARGB
(
%
2
+
1
,
0
)
<
4
>
SRC_B
(
%
1
,
8
)
{
NoDDClr
}
//
8
B
mov
(
8
)
DEST_ARGB
(
%
2
+
1
,
1
)
<
4
>
SRC_G
(
%
1
,
8
)
{
NoDDClr
,
NoDDChk
}
//
8
G
mov
(
8
)
DEST_ARGB
(
%
2
+
1
,
2
)
<
4
>
SRC_R
(
%
1
,
8
)
{
NoDDClr
,
NoDDChk
}
//
8
R
mov
(
8
)
DEST_ARGB
(
%
2
+
1
,
3
)
<
4
>
SRC_A
(
%
1
,
8
)
{
NoDDChk
}
//
8
A
}
i965_drv_video/shaders/post_processing/Common/SetupVPKernel.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Modual
name
:
SetupVPKernel.asm
//
//
Initial
setup
for
running
video
-
processing
kernels
//
#
include
"
common.inc
"
//
//
Now
,
begin
source
code....
//
.code
#
include
"
Init_All_Regs.asm
"
mov
(
8
)
rMSGSRC.0
<
1
>
:
ud
r0.0
<
8
;8,1>:ud // Initialize message payload header with R0
#
if
defined
(
INC_BLENDING
)
mul
(
1
)
fALPHA_STEP_X
:
f
fS
CALING_STEP_RATIO
:
f
fVIDEO_STEP_X
:
f
//
StepX_ratio
=
Al
phaStepX
/
VideoStepX
#
endif
//
End
of
SetupVPKernel
i965_drv_video/shaders/post_processing/Common/common.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
#ifndef COMMON_INC
#define COMMON_INC
// Module name: common.inc
//
// Common header file for all Video-Processing kernels
//
.
default_execution_size
(
16
)
.
default_register_type
:
ub
.
reg_count_total
80
.
reg_count_payload
4
//========== Common constants ==========
// Bit position constants
#define BIT0 0x01
#define BIT1 0x02
#define BIT2 0x04
#define BIT3 0x08
#define BIT4 0x10
#define BIT5 0x20
#define BIT6 0x40
#define BIT7 0x80
#define BIT8 0x0100
#define BIT9 0x0200
#define BIT10 0x0400
#define BIT11 0x0800
#define BIT12 0x1000
#define BIT13 0x2000
#define BIT14 0x4000
#define BIT15 0x8000
#define BIT16 0x00010000
#define BIT17 0x00020000
#define BIT18 0x00040000
#define BIT19 0x00080000
#define BIT20 0x00100000
#define BIT21 0x00200000
#define BIT22 0x00400000
#define BIT23 0x00800000
#define BIT24 0x01000000
#define BIT25 0x02000000
#define BIT26 0x04000000
#define BIT27 0x08000000
#define BIT28 0x10000000
#define BIT29 0x20000000
#define BIT30 0x40000000
#define BIT31 0x80000000
#define nGRFWIB 32 // GRF register width in byte
#define nGRFWIW 16 // GRF register width in word
#define nGRFWID 8 // GRF register width in dword
#define nTOP_FIELD 0
#define nBOTTOM_FIELD 1
#define nPREVIOUS_FRAME 0 // Previous frame
#define nCURRENT_FRAME 1 // Current frame
#define nNEXT_FRAME 2 // Next frame
#ifdef GT
// GT DI Kernel
#else // ILK
// ILK DI Kernel
#endif
//===================================
//========== Macros ==========
#define REGION(Width,HStride) <Width*HStride;Width,HStride> // Region definition when ExecSize = Width
#define RegFile(a) a
#define REG(r,n) _REG(RegFile(r),n)
#define _REG(r,n) __REG(r,n)
#define __REG(r,n) r##n.0
#define REG2(r,n,s) _REG2(RegFile(r),n,s)
#define _REG2(r,n,s) __REG2(r,n,s)
#define __REG2(r,n,s) r##n.##s
#define dNULLREG null<1>:d
#define wNULLREG null<1>:w
#define KERNEL_ID(kernel_ID) mov NULLREG kernel_ID:ud
#define NODDCLR
#define NODDCLR_NODDCHK
#define NODDCHK
//#define NODDCLR { NoDDClr }
//#define NODDCLR_NODDCHK { NoDDClr, NoDDChk }
//#define NODDCHK { NoDDChk }
//========== Defines ====================
//========== GRF partition ==========
// r0 header : r0 (1 GRF)
// Static parameters : r1 - r5 (5 GRFS)
// Inline parameters : r6 - r7 (2 GRFs)
// MSGSRC : r9 (1 GRF)
// Top IO region : r10 - r33 (24 GRFS 8 for each component Y,U,V 16X8:w)
// Free space : r34 - r55 (22 GRFS)
// Bottom IO region : r56 - r79 (24 GRFS 8 for each component Y,U,V 16X8:w)
//===================================
//========== Static Parameters ==========
// r1
#define fPROCAMP_C0 r1.0 // DWORD 0, Procamp constant C0 in :f
#define wPROCAMP_C0 r1.0 // DWORD 0, Procamp constant C0 in :w
#define NUMBER_0002 r1.1 // DWORD 0, 0x0002 used in procamp for GT
#define udCP_MessageFormat r1.0 // DWORD 0, bits 2:3 of DWORD. (CE)
#define udCP_StatePointer r1.0 // DWORD 0, bits 31:5 of DWORD.(CE)
#define ubSRC_CF_OFFSET r1.4 // DWORD 1, byte 0-2. SRC packed color format YUV offset in :ub
#define ubDEST_RGB_FORMAT r1.8 // DWORD 2, byte 0. Dest RGB color format (0:ARGB FF:XRGB)
#define ubDEST_CF_OFFSET r1.8 // DWORD 2, byte 0-2. Dest packed color format YUV offset in :ub
#define fPROCAMP_C1 r1.3 // DWORD 3, Procamp constant C1 in :f
#define wPROCAMP_C1 r1.6 // DWORD 3, Procamp constant C1 in :w
#define NUMBER_0100 r1.7 // DWORD 3, 0x0100 used in procamp for GT
#define fPROCAMP_C2 r1.4 // DWORD 4, Procamp constant C2 in :f
#define wPROCAMP_C2 r1.8 // DWORD 4, Procamp constant C2 in :w
#define uwSPITCH_DIV2 r1.10 // DWORD 5, byte 0-1. statistics surface pitch divided by 2
#define fVIDEO_STEP_Y r1.6 // DWORD 6, :f, AVS normalized reciprocal of Y Scaling factor
#define ubSTMM_SHIFT r1.24 // DWORD 6, byte 0. Amount of right shift for the DI blending equation
#define ubSTMM_MIN r1.25 // DWORD 6, byte 1. Min STMM for DI blending equation
#define ubSTMM_MAX r1.26 // DWORD 6, byte 2. Max STMM for DI blending equation
#define ubTFLD_FIRST r1.27 // DWORD 6, byte 3. Field parity order
#define fPROCAMP_C5 r1.7 // DWORD 7, Procamp constant C3 in :f
#define wPROCAMP_C5 r1.14 // DWORD 7, Procamp constant C3 in :w
// r2
#define fPROCAMP_C3 r2.0 // DWORD 0, Procamp constant C4 in :f
#define wPROCAMP_C3 r2.0 // DWORD 0, Procamp constant C4 in :w
#define fCSC_C5 r2.2 // DWORD 2. WG+CSC constant C5
#define wCSC_C5 r2.4 // DWORD 2. WG+CSC constant C5
#define fPROCAMP_C4 r2.3 // DWORD 3, Procamp constant C5 in :f
#define wPROCAMP_C4 r2.6 // DWORD 3, Procamp constant C5 in :w
#define fCSC_C8 r2.4 // DWORD 4. WG+CSC constant C8
#define wCSC_C8 r2.8 // DWORD 4. WG+CSC constant C8
#define fCSC_C9 r2.7 // DWORD 7. WG+CSC constant C9
#define wCSC_C9 r2.14 // DWORD 7. WG+CSC constant C9
// r3
#define fCSC_C0 r3.0 // DWORD 0. WG+CSC constant C0
#define wCSC_C0 r3.0 // DWORD 0. WG+CSC constant C0
#define fSCALING_STEP_RATIO r3.1 // DWORD 1, = Alpha_X_Scaling_Step / Video_X_scaling_Step :f (blending)
#define fALPHA_STEP_X r3.1 // DWORD 1, = 1/Scale X, 0.5 = 2x, in :f (blending)
#define fALPHA_STEP_Y r3.2 // DWORD 2, = 1/Scale Y, in :f
#define fCSC_C4 r3.3 // DWORD 3. WG+CSC constant C4
#define wCSC_C4 r3.6 // DWORD 3. WG+CSC constant C4
#define fCSC_C1 r3.4 // DWORD 4. WG+CSC constant C1
#define wCSC_C1 r3.8 // DWORD 4. WG+CSC constant C1
#define wSRC_H_ORI_OFFSET r3.10 // DWORD 5, bytes 0,1 :w
#define wSRC_V_ORI_OFFSET r3.11 // DWORD 5, bytes 2,3 :w
#define dCOLOR_PIXEL r3.6 // DWORD 6. Color pixel for Colorfill
#define fCSC_C2 r3.6 // DWORD 6. WG+CSC constant C2
#define wCSC_C2 r3.12 // DWORD 6. WG+CSC constant C2
#define fCSC_C3 r3.7 // DWORD 7. WG+CSC constant C3
#define wCSC_C3 r3.14 // DWORD 7. WG+CSC constant C3
// r4
#define fCSC_C6 r4.0 // DWORD 0. WG+CSC constant C6
#define wCSC_C6 r4.0 // DWORD 0. WG+CSC constant C6
#define wFRAME_ENDX r4.2 // DWORD 1, word 0. Horizontal end = Origin+Width (in pixels)(for multiple blocks)
#define wNUM_BLKS r4.3 // DWORD 1, word 1. Number of blocks to process (for multiple blocks)
#define wCOPY_ORIX r4.5 // DWORD 2, word 1. A copy of X origin (for multiple blocks)
#define uwNLAS_ENABLE r4.4 // DWORD 2, bit 15, NLAS enble bit
#define fCSC_C7 r4.3 // DWORD 3. WG+CSC constant C7
#define wCSC_C7 r4.6 // DWORD 3. WG+CSC constant C7
#define fCSC_C10 r4.4 // DWORD 4. WG+CSC constant C10
#define wCSC_C10 r4.8 // DWORD 4. WG+CSC constant C10
#define fFRAME_VID_ORIX r4.5 // DWORD 5, Frame horizontal origin normalized for scale kernel
#define fFRAME_ALPHA_ORIX r4.6 // DWORD 6. Normalized alpha horiz origin for the frame
#define fCSC_C11 r4.7 // DWORD 7. WG+CSC constant C11
#define wCSC_C11 r4.14 // DWORD 7. WG+CSC constant C11
//========================================
//========== Inline parameters ===========
// r5
#define wORIX r5.0 // DWORD 0, byte 0-1. :w, Destination Block Horizontal Origin in pel
#define wORIY r5.1 // DWORD 0, byte 2-3. :w, Destination Block Vertical Origin in pel
#define fSRC_VID_H_ORI r5.1 // DWORD 1, :f, SRC Y horizontal origin normalized for scale kernel
#define fSRC_VID_V_ORI r5.2 // DWORD 2, :f, SRC Y vertical origin normalized for scale kernel
#define fSRC_ALPHA_H_ORI r5.3 // DWORD 3, :f, Normalized alpha horizontal origin
#define fSRC_ALPHA_V_ORI r5.4 // DWORD 4, :f, Normalized alpha vertical origin
#define uwALPHA_MASK_X r5.10 // DWORD 5, byte 0-1 :w, H. alpha mask
#define ubALPHA_MASK_Y r5.22 // DWORD 5, byte 2. :ub,V. alpha mask
#define ubBLK_CNT_X r5.23 // DWORD 5, byte 3, :ub, Horizontal Block Count per thread
#define udBLOCK_MASK r5.6 // DWORD 6
#define uwBLOCK_MASK_H r5.12 // DWORD 6, byte 0-1 :uw, Block horizontal mask used in non-DWord aligned kernels
#define ubBLOCK_MASK_V r5.26 // DWORD 6, byte 2 :ub, Block vertical mask used in non-DWord aligned kernels
#define ubNUM_BLKS r5.27 // DWORD 6, byte 3, :ub, Total Block Count per thread
#define fVIDEO_STEP_X r5.7 // DWORD 7. :f, AVS normalized reciprocal of X Scaling factor
// r6
#define fVIDEO_STEP_DELTA r6.0 // DWORD 0. :f, AVS normalized delta between 2 adjacent scaling steps (used for non-linear scaling)
//====================== Binding table =========================================
#if defined(DNDI)
// DNDI Surface Binding Table
//#define nBI_SRC_CURR 0 // Current input frame surface
//#define nBI_SRC_PRIV 1 // Denoised previous input frame surface
//#define nBI_SRC_STAT 2 // Statistics input surface (STMM / Noise motion history)
//#define nBI_DEST_1ST 3 // 1st deinterlaced output frame surface
// #define nBI_DEST_YUV 3 // Dest frame YUV (for DN only)
//#define nBI_DEST_Y 3 // Dest frame Y (for DN only)
//#define nBI_DEST_2ND 4 // 2nd deinterlaced output frame surface
//#define nBI_DEST_DN_CURR 6 // Denoised current output frame surface
//#define nBI_DEST_STAT 7 // Statistics output surface (STMM / Noise motion history)
// #define nBI_DEST_U 8 // Dest frame U (for DN only)
// #define nBI_DEST_V 9 // Dest frame V (for DN only)
// #define nBI_SRC_U 10 // Src frame U (for DN only)
// #define nBI_SRC_V 11 // Src frame V (for DN only)
// #define nBI_SRC_UV 10 // Current src frame for UV
#endif
#if defined(INPUT_PL3)
// PL3 Surface Binding Table
// #define nBI_SRC_ALPHA 0 // Alpha
// #define nBI_SRC_Y 1 // Current src frame
// #define nBI_SRC_U 2 // Current src frame
// #define nBI_SRC_V 3 // Current src frame
// #define nBI_DEST_Y 10 // Dest frame
// #define nBI_DEST_U 11 // Dest frame
// #define nBI_DEST_V 12 // Dest frame
// #define nBI_DEST_YUV 7 // Dest frame
// #define nBI_DEST_RGB 7 // same num as BI_DEST_YUV, never used at the same time
#endif
#if defined(INPUT_PL2)
// PL2 Surface Binding Table
// #define nBI_SRC_ALPHA 0 // Alpha
// #define nBI_SRC_Y 1 // Current src frame for Y + offseted UV
// #define nBI_SRC_YUV 1 // Current src frame for YUV in case of NV12_AVS
// #define nBI_SRC_UV 2 // Current src frame for UV
// #define nBI_DEST_YUV 7 // Current dest frame for Y + offseted UV
// #define nBI_DEST_RGB 7 // same num as BI_DEST_YUV, never used at the same time
// #define nBI_DEST_Y 10 // Dest frame
// #define nBI_DEST_U 11 // Dest frame
// #define nBI_DEST_V 12 // Dest frame
#endif
#if defined(INPUT_PA) || defined(COLORFILL)
// Packed Surface Binding Table
// #define nBI_SRC_ALPHA 0 // Alpha
// #define nBI_SRC_YUV 1 // Current src frame
// #define nBI_DEST_YUV 3 // Dest frame
// #define nBI_DEST_RGB 3 // same num as BI_DEST_YUV, never used at the same time
#endif
//supper binding table
#define nBI_ALPHA_SRC 0
#define nBI_CURRENT_SRC_YUV 1
#define nBI_FIELD_COPY_SRC_1_YUV 1
#define nBI_CURRENT_SRC_Y 1
#define nBI_FIELD_COPY_SRC_1_Y 1
#define nBI_CURRENT_SRC_RGB 1
#define nBI_CURRENT_SRC_UV 2
#define nBI_FIELD_COPY_SRC_1_UV 2
#define nBI_CURRENT_SRC_U 2
#define nBI_FIELD_COPY_SRC_1_U 2
#define nBI_CURRENT_SRC_V 3
#define nBI_FIELD_COPY_SRC_1_V 3
#define nBI_TEMPORAL_REFERENCE_YUV 4
#define nBI_FIELD_COPY_SRC_2_YUV 4
#define nBI_TEMPORAL_REFERENCE_Y 4
#define nBI_FIELD_COPY_SRC_2_Y 4
#define nBI_CURRENT_SRC_YUV_HW_DI 4
#define nBI_TEMPORAL_REFERENCE_UV 5
#define nBI_FIELD_COPY_SRC_2_UV 5
#define nBI_TEMPORAL_REFERENCE_U 5
#define nBI_FIELD_COPY_SRC_2_U 5
#define nBI_DENOISED_PREV_HW_DI 5
#define nBI_TEMPORAL_REFERENCE_V 6
#define nBI_FIELD_COPY_SRC_2_V 6
#define nBI_STMM_HISTORY 6
#define nBI_DESTINATION_YUV 7
#define nBI_DESTINATION_RGB 7
#define nBI_DESTINATION_Y 7
#define nBI_DESTINATION_UV 8
#define nBI_DESTINATION_U 8
#define nBI_DESTINATION_V 9
#define nBI_DESTINATION_1_YUV 10
#define nBI_DESTINATION_1_Y 10
#define nBI_DESTINATION_1_UV 11
#define nBI_DESTINATION_1_U 11
#define nBI_DESTINATION_1_V 12
#define nBI_DESTINATION_2_YUV 13
#define nBI_DESTINATION_2_Y 13
#define nBI_DESTINATION_2_UV 14
#define nBI_DESTINATION_2_U 14
#define nBI_DESTINATION_2_V 15
#define nBI_STMM_HISTORY_OUTPUT 20
#define nBI_TEMPORAL_REFERENCE_YUV_PDI 21
#define nBI_TEMPORAL_REFERENCE_Y_PDI 21
#define nBI_TEMPORAL_REFERENCE_UV_PDI 22
#define nBI_TEMPORAL_REFERENCE_U_PDI 22
#define nBI_TEMPORAL_REFERENCE_V_PDI 23
#define nBI_SUBVIDEO_YUV 26
#define nBI_SUBVIDEO_Y 26
#define nBI_SUBVIDEO_UV 27
#define nBI_SUBVIDEO_U 27
#define nBI_SUBVIDEO_V 28
#define nBI_SUBPICTURE_YUV 29
#define nBI_SUBPICTURE_P8 29
#define nBI_SUBPICTURE_A8 30
#define nBI_GRAPHIC_YUV 31
#define nBI_GRAPHIC_P8 31
#define nBI_GRAPHIC_A8 32
//========== Planar Sampler State Table Index ==========
#define nSI_SRC_ALPHA 0x000 // Sampler State for Alpha
//Sampler Index for AVS/IEF messages
#define nSI_SRC_Y 0x400 // Sampler State for Y
#define nSI_SRC_U 0x800 // Sampler State for U
#define nSI_SRC_V 0xC00 // Sampler State for V
#define nSI_SRC_UV 0x800 // For NV12 surfaces
#define nSI_SRC_YUV 0x400 // For Packed surfaces
#define nSI_SRC_RGB 0x400 // For ARGB surfaces
//Sampler Index for SIMD16 sampler messages
#define nSI_SRC_SIMD16_Y 0x100 // Sampler State for Y
#define nSI_SRC_SIMD16_U 0x200 // Sampler State for U
#define nSI_SRC_SIMD16_V 0x300 // Sampler State for V
#define nSI_SRC_SIMD16_UV 0x200 // For NV12 surfaces
#define nSI_SRC_SIMD16_YUV 0x100 // For Packed surfaces
#define nSI_SRC_SIMD16_RGB 0x100 // For ARGB surfaces
// Common Registers
#define pCF_Y_OFFSET a0.4 // Address register holding Y offset
#define pCF_U_OFFSET a0.5 // Address register holding U offset
#define pCF_V_OFFSET a0.6 // Address register holding V offset
// #define YUV_ORI ORIX // Used by writing packed data to dport
//================= Message Payload Header fields ==============================
#define IDP r0.2:ud // Interface Descriptor Pointer
//================= Common Message Descriptor TBD add common load and save =====
// Message descriptor for dataport media write
#ifdef GT
// Message Descriptors
// = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
// 1 (header present 1) 0 0 1010 (media block write) 00000
// 00000000 (binding table index - set later)
// = 0x02094000
#define nDPMW_MSGDSC 0x02094000
#define nDPMR_MSGDSC 0x02098000 // Data Port Media Block Read Message Descriptor
// TBD
#else // ILK
// Message Descriptors
// = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
// 1 (header present 1) 000 0 010 (media block write) 0000
// 00000000 (binding table index - set later)
// = 0x02082000
#define nDPMW_MSGDSC 0x02082000 // Data Port Media Block Write Message Descriptor
#define nDPMR_MSGDSC 0x0208A000 // Data Port Media Block Read Message Descriptor
#endif
// Message Length defines
#define nMSGLEN_1 0x02000000 // Message Length of 1 GRF for Send
#define nMSGLEN_2 0x04000000 // Message Length of 2 GRF for Send
#define nMSGLEN_4 0x08000000 // Message Length of 4 GRF for Send
#define nMSGLEN_8 0x10000000 // Message Length of 8 GRF for Send
// Response Length defines
#define nRESLEN_1 0x00100000 // Message Response Length of 1 GRF from Send
#define nRESLEN_2 0x00200000 // Message Response Length of 2 GRF from Send
#define nRESLEN_3 0x00300000 // Message Response Length of 3 GRF from Send
#define nRESLEN_4 0x00400000 // Message Response Length of 4 GRF from Send
#define nRESLEN_5 0x00500000 // Message Response Length of 5 GRF from Send
#define nRESLEN_8 0x00800000 // Message Response Length of 8 GRF from Send
#define nRESLEN_9 0x00900000 // Message Response Length of 9 GRF from Send
#define nRESLEN_11 0x00B00000 // Message Response Length of 11 GRF from Send
#define nRESLEN_12 0x00C00000 // Message Response Length of 12 GRF from Send
#define nRESLEN_16 0x01000000 // Message Response Length of 16 GRF from Send
// Block Width and Height Size defines
#define nBLOCK_WIDTH_4 0x00000003 // Block Width 4
#define nBLOCK_WIDTH_5 0x00000004 // Block Width 5
#define nBLOCK_WIDTH_8 0x00000007 // Block Width 8
#define nBLOCK_WIDTH_9 0x00000008 // Block Width 9
#define nBLOCK_WIDTH_12 0x0000000B // Block Width 12
#define nBLOCK_WIDTH_16 0x0000000F // Block Width 16
#define nBLOCK_WIDTH_20 0x00000013 // Block Width 20
#define nBLOCK_WIDTH_32 0x0000001F // Block Width 32
#define nBLOCK_HEIGHT_1 0x00000000 // Block Height 1
#define nBLOCK_HEIGHT_2 0x00010000 // Block Height 2
#define nBLOCK_HEIGHT_4 0x00030000 // Block Height 4
#define nBLOCK_HEIGHT_5 0x00040000 // Block Height 5
#define nBLOCK_HEIGHT_8 0x00070000 // Block Height 8
// Extended Message Descriptors
#define nEXTENDED_MATH 0x1
#define nSMPL_ENGINE 0x2
#define nMESSAGE_GATEWAY 0x3
#define nDATAPORT_READ 0x4
#define nDATAPORT_WRITE 0x5
#define nURB 0x6
#define nTS_EOT 0x27 // with End-Of-Thread bit ON
// Common message descriptors:
#ifdef GT
#define nEOT_MSGDSC 0x02000010 // End of Thread Message Descriptor
#define IF_NULL null:uw null:uw null:uw //for different if instructions on ILK and Gen6
#else //ILK
#define nEOT_MSGDSC 0x02000000 // End of Thread Message Descriptor
#define IF_NULL
#endif
//===================== Math Function Control ===================================
#define mfcINV 0x1 // reciprocal
#define mfcLOG 0x2 // log
#define mfcEXP 0x3 // exponent
#define mfcSQRT 0x4 // square root
#define mfcRSQ 0x5 // reciprocal square root
#define mfcSIN 0x6 // sine (in radians)
#define mfcCOS 0x7 // cosine (in radians)
#define mfcSINCOS 0x8 // dst0 = sin of src0, dst1 = cosine of src0 (in radians) - GT+ ONLY
#define mfcPOW 0xA // abs(src0) raised to the src1 power
#define mfcINT_DIV_QR 0xB // return quotient and remainder
#define mfcINT_DIV_Q 0xC // return quotient
#define mfcINT_DIV_R 0xD // return remainder
//=================== Message related registers =================================
#ifdef GT
#define udDUMMY_NULL
#else // _ILK
#define udDUMMY_NULL null:ud // Used in send inst as src0
#endif
//----------- Message Registers ------------
#define mMSGHDR m1 // Message Payload Header
#define mMSGHDRY m1 // Message Payload Header register for Y data
#define mMSGHDRU m2 // Message Payload Header register for U data
#define mMSGHDRV m3 // Message Payload Header register for V data
#define mMSGHDRYA m4 // Second Message Payload Header register for Y data
#define mMSGHDRH m5 // Message Payload Header register for motion history
#define mMSGHDRY1 m1 // Message Payload Header register for first Y data
#define mMSGHDRY2 m2 // Message Payload Header register for second Y data
#define mMSGHDRY3 m3 // Message Payload Header register for third Y data
#define mMSGHDRY4 m4 // Message Payload Header register for fourth Y data
#define mMSGHDRY5 m5 // Message Payload Header register for fifth Y data
#define mMSGHDRY6 m6 // Message Payload Header register for sixth Y data
#define mMSGHDR_EOT m15 // Dummy Message Register for EOT
#define rMSGSRC r8 // Message source register
#define pMSGDSC a0.0:ud // Message Descriptor register (type DWORD)
#define udMH_ORI rMSGSRC.0 // Data Port Media Block R/W message header block offset
#define udMH_ORIX rMSGSRC.0 // Data Port Media Block R/W message header X offset
#define udMH_ORIY rMSGSRC.1 // Data Port Media Block R/W message header Y offset
#define udMH_SIZE rMSGSRC.2 // Data Port Media Block R/W message header block width & height
// M2 - M9 for message data payload
.
declare
mubMSGPAYLOAD
Base
=
m2
ElementSize
=
1
SrcRegion
=
REGION
(
16
,
1
)
Type
=
ub
.
declare
muwMSGPAYLOAD
Base
=
m2
ElementSize
=
2
SrcRegion
=
REGION
(
16
,
1
)
Type
=
uw
.
declare
mudMSGPAYLOAD
Base
=
m2
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
ud
.
declare
mfMSGPAYLOAD
Base
=
m2
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
f
//=================== End of thread instruction ===========================
#ifdef GT
#define END_THREAD mov (8) mMSGHDR_EOT<1>:ud r0.0<8;8,1>:ud \n\
send
(
1
)
null
<
1
>:
d
mMSGHDR_EOT
nTS_EOT
nEOT_MSGDSC
#else // ILK This should be changed to 1 instruction; I have tested it and it works - vK
#define END_THREAD mov (8) mMSGHDR_EOT<1>:ud r0.0<8;8,1>:ud \n\
send
(
1
)
dNULLREG
mMSGHDR_EOT
udDUMMY_NULL
nTS_EOT
nEOT_MSGDSC
:
ud
#endif
//=======================================================================
// Region declarations for SRC and DEST as TOP and BOT
// Common I/O regions
#define nREGION_1 1
#define nREGION_2 2
//*** These region base GRFs are fixed regardless planar/packed, and data alignment.
//*** Each kernel is responsible to select the correct region declaration below.
//*** YUV regions are not necessarily next to each other.
#define nTOP_Y 10 // r10 - r17 (8 GRFs)
#define nTOP_U 18 // r18 - r25 (8 GRFs)
#define nTOP_V 26 // r26 - r33 (8 GRFs)
#define nBOT_Y 56 // r56 - r63 (8 GRFs)
#define nBOT_U 64 // r64 - r71 (8 GRFs)
#define nBOT_V 72 // r72 - r79 (8 GRFs)
// Define temp space for any usages
#define nTEMP0 34
#define nTEMP1 35
#define nTEMP2 36
#define nTEMP3 37
#define nTEMP4 38
#define nTEMP5 39
#define nTEMP6 40
#define nTEMP7 41
#define nTEMP8 42
#define nTEMP10 44
#define nTEMP12 46
#define nTEMP14 48
#define nTEMP16 50
#define nTEMP17 51
#define nTEMP18 52
#define nTEMP24 58
// Common region 1
.
declare
ubTOP_Y
Base
=
REG
(
r
,
nTOP_Y
)
ElementSize
=
1
SrcRegion
=
REGION
(
16
,
1
)
DstRegion
=<
1
>
Type
=
ub
.
declare
ubTOP_U
Base
=
REG
(
r
,
nTOP_U
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
1
)
DstRegion
=<
1
>
Type
=
ub
.
declare
ubTOP_V
Base
=
REG
(
r
,
nTOP_V
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
1
)
DstRegion
=<
1
>
Type
=
ub
.
declare
uwTOP_Y
Base
=
REG
(
r
,
nTOP_Y
)
ElementSize
=
2
SrcRegion
=
REGION
(
16
,
1
)
DstRegion
=<
1
>
Type
=
uw
.
declare
uwTOP_U
Base
=
REG
(
r
,
nTOP_U
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
DstRegion
=<
1
>
Type
=
uw
.
declare
uwTOP_V
Base
=
REG
(
r
,
nTOP_V
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
DstRegion
=<
1
>
Type
=
uw
.
declare
ub2TOP_Y
Base
=
REG
(
r
,
nTOP_Y
)
ElementSize
=
1
SrcRegion
=
REGION
(
16
,
2
)
DstRegion
=<
1
>
Type
=
ub
.
declare
ub2TOP_U
Base
=
REG
(
r
,
nTOP_U
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
2
)
DstRegion
=<
1
>
Type
=
ub
.
declare
ub2TOP_V
Base
=
REG
(
r
,
nTOP_V
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
2
)
DstRegion
=<
1
>
Type
=
ub
.
declare
ub4TOP_Y
Base
=
REG
(
r
,
nTOP_Y
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
4
)
Type
=
ub
.
declare
ub4TOP_U
Base
=
REG
(
r
,
nTOP_U
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
4
)
Type
=
ub
.
declare
ub4TOP_V
Base
=
REG
(
r
,
nTOP_V
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
4
)
Type
=
ub
.
declare
ubTOP_ARGB
Base
=
REG
(
r
,
nTOP_Y
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
4
)
Type
=
ub
// Used by "send" instruction
.
declare
udTOP_Y_IO
Base
=
REG
(
r
,
nTOP_Y
)
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
ud
.
declare
udTOP_U_IO
Base
=
REG
(
r
,
nTOP_U
)
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
ud
.
declare
udTOP_V_IO
Base
=
REG
(
r
,
nTOP_V
)
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
ud
// Common region 2
.
declare
ubBOT_Y
Base
=
REG
(
r
,
nBOT_Y
)
ElementSize
=
1
SrcRegion
=
REGION
(
16
,
1
)
DstRegion
=<
1
>
Type
=
ub
.
declare
ubBOT_U
Base
=
REG
(
r
,
nBOT_U
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
1
)
DstRegion
=<
1
>
Type
=
ub
.
declare
ubBOT_V
Base
=
REG
(
r
,
nBOT_V
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
1
)
DstRegion
=<
1
>
Type
=
ub
.
declare
uwBOT_Y
Base
=
REG
(
r
,
nBOT_Y
)
ElementSize
=
2
SrcRegion
=
REGION
(
16
,
1
)
DstRegion
=<
1
>
Type
=
uw
.
declare
uwBOT_U
Base
=
REG
(
r
,
nBOT_U
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
DstRegion
=<
1
>
Type
=
uw
.
declare
uwBOT_V
Base
=
REG
(
r
,
nBOT_V
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
DstRegion
=<
1
>
Type
=
uw
.
declare
ub2BOT_Y
Base
=
REG
(
r
,
nBOT_Y
)
ElementSize
=
1
SrcRegion
=
REGION
(
16
,
2
)
DstRegion
=<
1
>
Type
=
ub
.
declare
ub2BOT_U
Base
=
REG
(
r
,
nBOT_U
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
2
)
DstRegion
=<
1
>
Type
=
ub
.
declare
ub2BOT_V
Base
=
REG
(
r
,
nBOT_V
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
2
)
DstRegion
=<
1
>
Type
=
ub
.
declare
ubBOT_ARGB
Base
=
REG
(
r
,
nBOT_Y
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
4
)
Type
=
ub
// Used by "send" instruction
.
declare
udBOT_Y_IO
Base
=
REG
(
r
,
nBOT_Y
)
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
ud
.
declare
udBOT_U_IO
Base
=
REG
(
r
,
nBOT_U
)
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
ud
.
declare
udBOT_V_IO
Base
=
REG
(
r
,
nBOT_V
)
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
ud
// End of common.inc
#endif // COMMON_INC
i965_drv_video/shaders/post_processing/Common/readSampler16x1.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Module
name
:
readSampler16x1.asm
//
//
Read
one
row
of
pix
through
sampler
//
//#
define
SAMPLER_MSG_DSC
0x166A0000
//
ILK
Sampler
Message
Descriptor
//
Send
Message
[
DevILK
]
Message
Descriptor
//
MBZ
MsgL
=
5
MsgR
=
8
H
MBZ
SI
MD
MsgType
SmplrIndx
BindTab
//
000
0
101
0
1000
1
0
10
0000
0000
00000000
//
0
A
8
A
0
0
0
0
//
MsgL
=
1
+
2
*
2
(
u
,
v
)
=
5
MsgR
=
8
#
define
SAMPLER_MSG_DSC
0x0A8A0000
//
ILK
Sampler
Message
Descriptor
//
Assume
MSGSRC
is
set
al
ready
in
the
caller
//
mov
(
8
)
rMSGSRC.0
<
1
>
:
ud
0
:
ud
//
Unused
fileds
//
Read
16
sampled
pixels
and
stored
them
in
float32
in
8
GRFs
//
422
data
is
expanded
to
444
,
return
8
GRF
in
the
order
of
RGB
-
(
UYV
-
)
.
//
420
data
has
three
surfaces
,
return
8
GRF.
Valid
is
al
ways
in
the
1
st
GRF
when
in
R8.
Make
sure
no
overwrite
the
following
3
GRFs.
//
alpha
data
is
expanded
to
4444
,
return
8
GRF
in
the
order
of
RGBA
(
UYVA
)
.
mov
(
16
)
mMSGHDR
<
1
>
:
uw
rMSGSRC
<
16
;16,1>:uw
send
(
16
)
DATABUF
(
0
)
<
1
>
mMSGHDR
udDUMMY_NULL
0x2
SAMPLER_MSG_DSC
+
SAMPLER_IDX
+
BINDING_IDX
:
ud
i965_drv_video/shaders/post_processing/Common/undefall.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Modual name: undefall.inc
//
// undefine all global symbol for new process
//
//Source definitions
#undef ubSRC_Y
#undef ubSRC_U
#undef ubSRC_V
#undef ub2SRC_Y
#undef ub2SRC_U
#undef ub2SRC_V
#undef ub4SRC_Y
#undef ub4SRC_U
#undef ub4SRC_V
#undef uwSRC_Y
#undef uwSRC_U
#undef uwSRC_V
#undef udSRC_Y
#undef udSRC_U
#undef udSRC_V
#undef udSRC_YUV
#undef nSRC_YUV_REG
//Destination definitions
#undef ubDEST_Y
#undef ubDEST_U
#undef ubDEST_V
#undef ub2DEST_Y
#undef ub2DEST_U
#undef ub2DEST_V
#undef ub4DEST_Y
#undef ub4DEST_U
#undef ub4DEST_V
#undef uwDEST_Y
#undef uwDEST_U
#undef uwDEST_V
#undef udDEST_Y
#undef udDEST_U
#undef udDEST_V
#undef udDEST_YUV
#undef nDEST_YUV_REG
#undef ubDEST_ARGB
// End of undefall.inc
i965_drv_video/shaders/post_processing/Core_Kernels/AVS_IEF.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Module name: AVS_IEF.inc
#ifndef _AVS_INF_INC_
#define _AVS_INF_INC_
#include "undefall.inc" //Undefine the SRC and DEST sysmbols
// Message Header
// m0.7 31:0 Debug
// m0.6 31:0 Debug
// m0.5 31:0 Ignored
// m0.4 31:0 Ignored
// m0.3 31:0 Ignored
// m0.2 31:16 Ignored
// 15 Alpha Write Channel Mask enable=0, disable=1
// 14 Blue Write Channel Mask (V)
// 13 Green Write Channel Mask (Y)
// 12 Red Write Channel Mask (U)
// 11:0 Ignored
// m0.1 Ignored
// m0.0 Ignored
#define mAVS_8x8_HDR m0 // Message Header
#define mAVS_PAYLOAD m1 // Message Payload Header
#define mAVS_8x8_HDR_2 m2 // Message Header
#define mAVS_PAYLOAD_2 m3 // Message Payload Header
#define mAVS_8x8_HDR_UV m2 // Message Header
#define mAVS_PAYLOAD_UV m3 // Message Payload Header
#define rAVS_8x8_HDR rMSGSRC // Mirror of Message Header
#define rAVS_PAYLOAD r9 // Mirror of Message Payload Header
// AVS payload
// m1.7 Ignored
// m1.6 Pixel 0 V Address ---> ORIY (Y0)
// m1.5 Delta V ---> Step Y
// m1.4 Ignored
// m1.3 Ignored
// m1.2 Pixel 0 U Address ---> ORIX (X0)
// m1.1 U 2nd Derivative ---> NLAS dx
// m1.0 Delta U ---> Step X
// Sampler Message Descriptor
// 31:29 Reserved 000
// 28:25 Message length 0010
// 24:20 Response length xxxxx ---> 4GRFs for each enabled channel
// 19 Header Present 1
// 18 MBZ 0
// 17:16 SIMD Mode 11 ---> SIMD64
// 15:12 Message Type 0011 ---> sample_8x8
// 11:8 Sampler Index xxxx
// 7:0 Binding Table Index xxxxxxxx
#define nAVS_MSG_DSC_1CH 0x044BB000
#define nAVS_MSG_DSC_2CH 0x048BB000
#define nAVS_MSG_DSC_3CH 0x04CBB000
#define nAVS_MSG_DSC_4CH 0x050BB000
#define nAVS_RED_CHANNEL_ONLY 0x0000E000 // Enable Red channel only
#define nAVS_GREEN_CHANNEL_ONLY 0x0000D000 // Enable Green channel only
#define nAVS_RED_BLUE_CHANNELS 0x0000A000 // Enable Red and Blue channels
#define nAVS_RGB_CHANNELS 0x00008000 // Enable RGB(YUV) channels
#define nAVS_ALL_CHANNELS 0x00000000 // Enable all channels (ARGB\AYUV)
.
declare
ubAVS_RESPONSE
Base
=
REG
(
r
,
nTEMP8
)
ElementSize
=
1
SrcRegion
=
REGION
(
16
,
1
)
Type
=
ub
.
declare
uwAVS_RESPONSE
Base
=
REG
(
r
,
nTEMP8
)
ElementSize
=
2
SrcRegion
=
REGION
(
16
,
1
)
Type
=
uw
.
declare
ubAVS_RESPONSE_2
Base
=
REG
(
r
,
nTEMP24
)
ElementSize
=
1
SrcRegion
=
REGION
(
16
,
1
)
Type
=
ub
.
declare
uwAVS_RESPONSE_2
Base
=
REG
(
r
,
nTEMP24
)
ElementSize
=
2
SrcRegion
=
REGION
(
16
,
1
)
Type
=
uw
#if (nSRC_REGION==nREGION_2)
#define uwDEST_Y uwBOT_Y
#define uwDEST_U uwBOT_U
#define uwDEST_V uwBOT_V
#define ubDEST_Y ubBOT_Y
#undef nSRC_REGION
#define nSRC_REGION nREGION_2
#else //(nSRC_REGION==nREGION_1)
#define uwDEST_Y uwTOP_Y
#define uwDEST_U uwTOP_U
#define uwDEST_V uwTOP_V
#define ubDEST_Y ubTOP_Y
#undef nSRC_REGION
#define nSRC_REGION nREGION_1
#endif
#endif //_AVS_INF_INC_
i965_drv_video/shaders/post_processing/Core_Kernels/AVS_SetupFirstBlock.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//------------------------------------------------------------------------------
//
AVS_SetupFirstBlock.asm
//------------------------------------------------------------------------------
//
Setup
Message
Header
//
mov
(
8
)
mAVS_8x8_HDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
//
Check
NLAS
Enable
bit
and.z.f0.0
(
1
)
wNULLREG
uwNLAS_ENABLE
:
uw
BIT15
:
uw
(
f0.0
)
mov
(
1
)
fVIDEO_STEP_DELTA
:
f
0.0
:
f
//
Setup
Message
Payload
Header
for
1
st
bl
ock
of
Media
Sampler
8
x8
mov
(
1
)
rAVS_PAYLOAD.0
:
f
fVIDEO_STEP_DELTA
:
f
//
NLAS
dx
mov
(
1
)
rAVS_PAYLOAD.1
:
f
fVIDEO_STEP_X
:
f
//
Step
X
mov
(
1
)
rAVS_PAYLOAD.5
:
f
fVIDEO_STEP_Y
:
f
//
Step
Y
mov
(
2
)
rAVS_PAYLOAD.2
<
4
>
:
f
fS
RC_VID_H_ORI
<
2
;2,1>:f //Orig X and Y
i965_drv_video/shaders/post_processing/Core_Kernels/AVS_SetupSecondBlock.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//------------------------------------------------------------------------------
//
AVS_SetupSecondBlock.asm
//------------------------------------------------------------------------------
//
NLAS
calculations
for
2
nd
bl
ock
of
Media
Sampler
8
x8
:
//
X
(
i
)
=
X0
+
dx
*
i
+
ddx
*
i
*
(
i
-
1
)
/
2
==
>
X
(
8
)
=
X0
+
dx
*
8
+
ddx
*
28
//
dx
(
i
)
=
dx
(
0
)
+
ddx
*
i
==
>
dx
(
8
)
=
dx
+
ddx
*
8
//
Calculating
X
(
8
)
mov
(
1
)
acc0.2
<
1
>
:
f
fS
RC_VID_H_ORI
:
f
mac
(
1
)
acc0.2
<
1
>
:
f
fVIDEO_STEP_X
:
f
8.0
:
f
mac
(
1
)
rAVS_PAYLOAD.2
:
f
fVIDEO_STEP_DELTA
:
f
28.0
:
f
//
Calculating
dx
(
8
)
mov
(
1
)
acc0.1
<
1
>
:
f
fVIDEO_STEP_X
:
f
mac
(
1
)
rAVS_PAYLOAD.1
:
f
fVIDEO_STEP_DELTA
:
f
8.0
:
f
i965_drv_video/shaders/post_processing/Core_Kernels/DI.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Module name: DI.inc
#ifdef GT
// GT DI Kernel
#else // ILK
// ILK DI Kernel
#endif
//---------------------------------------------------------------------------
// Binding table indices
//---------------------------------------------------------------------------
#define nBIDX_DI_PRV 10 // Previous DI-ed frame
#define nBIDX_DI_CUR 13 // Current DI-ed frame
#define nBIDX_DN 7 // Denoised frame
#define nBIDX_STAT 20 // Statistics
#define nBIDX_DI_Source 4 // Source Surface
//---------------------------------------------------------------------------
// Message descriptors
//---------------------------------------------------------------------------
// Extended message descriptor
#define nSMPL_ENGINE 0x2
#define nDATAPORT_WRITE 0x5
#define nTS_EOT 0x27 // with End-Of-Thread bit ON
// Message descriptor for end-of-thread
// = 000 0001 (message len) 00000 (resp len)
// 0 (header present 0) 00000000000000 0 (URB dereferenced) 0000
#define nEOT_MSGDSC 0x02000000
// Message descriptor for sampler read
// = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
// 1 (header present 1) 0 11 (SIMD32/64 mode)
// 1000 (message type) 0000 (DI state index)
// 00000000 (binding table index - set later)
// = 0x040b8000
// comment begin
// The following is commented out because of walker feature
// It corresponds to the #ifdef GT #else and #endif
//#define nSMPL_MSGDSC 0x040b8000
//#define nSMPL_RESP_LEN_DI 0x00c00000 // 12
//#define nSMPL_RESP_LEN_NODI_PL 0x00500000 // 5
//#define nSMPL_RESP_LEN_NODI_PA 0x00900000 // 9
//#define nSMPL_RESP_LEN_NODN 0x00900000 // 9
//#define nSMPL_RESP_LEN_PDI 0x00b00000 // 11
// comment end
#ifdef GT
#define nSMPL_MSGDSC 0x040b8000
#define nSMPL_RESP_LEN_DI 0x00c00000 // 12
#define nSMPL_RESP_LEN_NODI_PL 0x00500000 // 5 //DI disable, the XY stored in 5th GRF, no impact to return length
#define nSMPL_RESP_LEN_NODI_PA 0x00900000 // 9 //DI disable, the XY stored in 5th GRF, no impact to return length
#define nSMPL_RESP_LEN_NODN 0x00a00000 // 10 //NO DN, originally use 9, now we need use 10 to store the XY with walker
#define nSMPL_RESP_LEN_PDI 0x00b00000 // 11
#else
#define nSMPL_MSGDSC 0x040b8000
#define nSMPL_RESP_LEN_DI 0x00c00000 // 12
#define nSMPL_RESP_LEN_NODI_PL 0x00500000 // 5
#define nSMPL_RESP_LEN_NODI_PA 0x00900000 // 9
#define nSMPL_RESP_LEN_NODN 0x00900000 // 9
#define nSMPL_RESP_LEN_PDI 0x00b00000 // 11
#endif
// Message descriptor for dataport media write
#ifdef GT
// = 000 0000 (message len - set later) 00000 (resp len 0)
// 1 (header present 1) 0 0 1010 (media block write) 00000
// 00000000 (binding table index - set later)
// = 0x00094000
#define nDPMW_MSGDSC 0x00094000
#else // ILK
// = 000 0000 (message len - set later) 00000 (resp len 0)
// 1 (header present 1) 000 0 010 (media block write) 0000
// 00000000 (binding table index - set later)
// = 0x00082000
#define nDPMW_MSGDSC 0x00082000
#endif
#define nDPMW_MSG_LEN_STMM 0x04000000 // 2 - STMM
#define nDPMW_MSG_LEN_DH 0x04000000 // 2 - Denoise history
#define nDPMW_MSG_LEN_PA_DN 0x0a000000 // 5 - Denoised output
#define nDPMW_MSG_LEN_PA_NODI 0x12000000 // 9 - Denoised output - denoise only - DI disabled
#define nDPMW_MSG_LEN_PL_DN 0x06000000 // 3 - Denoised output
#define nDPMW_MSG_LEN_PL_NODI 0x0a000000 // 5 - Denoised output - denoise only - DI disabled
#define nDPMW_MSG_LEN_DI 0x0a000000 // 5 - DI output
//---------------------------------------------------------------------------
// Static and inline parameters
//---------------------------------------------------------------------------
// Static parameters
.
declare
ubTFLD_FIRST
Base
=
r1
.
27
ElementSize
=
1
Type
=
ub
// top field first
.
declare
ubSRCYUVOFFSET
Base
=
r1
.
4
ElementSize
=
1
Type
=
ub
// source packed format
.
declare
ubDSTYUVOFFSET
Base
=
r1
.
8
ElementSize
=
1
Type
=
ub
// destination packed format
.
declare
uwSPITCH_DIV2
Base
=
r1
.
10
ElementSize
=
2
Type
=
uw
// statistics surface pitch divided by 2
// Inline parameters
.
declare
uwXORIGIN
Base
=
r5
.
0
ElementSize
=
2
Type
=
uw
// X and Y origin
.
declare
uwYORIGIN
Base
=
r5
.
1
ElementSize
=
2
Type
=
uw
//---------------------------------------------------------------------------
// Kernel GRF variables
//---------------------------------------------------------------------------
// Message response (Denoised & DI-ed pixels & statistics)
.
declare
dRESP
Base
=
r8
ElementSize
=
4
Type
=
d
// Response message (12 or 5 or 11)
.
declare
ubRESP
Base
=
r8
ElementSize
=
1
Type
=
ub
.
declare
dSTMM
Base
=
r16
ElementSize
=
4
Type
=
d
// STMM
.
declare
ubDN_HIST_NODI
Base
=
r12
ElementSize
=
1
Type
=
ub
// Denoise history data (DI disabled)
.
declare
ubDN_HIST_DI
Base
=
r17
ElementSize
=
1
Type
=
ub
// Denoise history data (DI enabled)
.
declare
uwRETURNED_POSITION_DI
Base
=
r17
ElementSize
=
2
Type
=
uw
// XY_Return_Data (DI enabled)
.
declare
uwRETURNED_POSITION_DN
Base
=
r12
ElementSize
=
2
Type
=
uw
// XY_Return_Data (DI disabled)
.
declare
ub1ST_FLD_DN
Base
=
r12
ElementSize
=
1
Type
=
ub
// 1st field Denoised data (DI enabled)
.
declare
d1ST_FLD_DN
Base
=
r12
ElementSize
=
4
Type
=
d
.
declare
ub2ND_FLD_DN
Base
=
r18
ElementSize
=
1
Type
=
ub
// 2nd field Denoised data (DI enabled)
.
declare
d2ND_FLD_DN
Base
=
r18
ElementSize
=
4
Type
=
d
.
declare
ubPRV_DI
Base
=
r8
ElementSize
=
1
Type
=
ub
// Previous frame DI (DI enabled)
.
declare
ubCUR_DI
Base
=
r12
ElementSize
=
1
Type
=
ub
// Previous frame DI (DI enabled)
// Packed denoised output
.
declare
ubDN_YUV
Base
=
r22
ElementSize
=
1
Type
=
ub
// Denoised YUV422
.
declare
dDN_YUV
Base
=
r22
ElementSize
=
4
Type
=
d
#define npDN_YUV 704 // = 22*32 = 0x280
// Packed DI output
.
declare
dDI_YUV_PRV
Base
=
r32
ElementSize
=
4
Type
=
d
// Previous frame DI output
.
declare
dDI_YUV_CUR
Base
=
r36
ElementSize
=
4
Type
=
d
// Current frame DI output
#define npDI_YUV 1024 // = 32*32 = 0x
// For packed output
#define p422_YOFFSET a0.2
#define p422_UOFFSET a0.3
#define p422_VOFFSET a0.4
#define pDN_TFLDSRC a0.6
#define pDN_BFLDSRC a0.7
#define npRESP 192 // = 6*32
// Message source
.
declare
udMSGSRC
Base
=
r70
ElementSize
=
4
Type
=
ud
.
declare
uwMSGSRC
Base
=
r70
ElementSize
=
2
Type
=
uw
.
declare
dMSGSRC
Base
=
r70
ElementSize
=
4
Type
=
d
//---------------------------------------------------------------------------
// Kernel MRF variables
//---------------------------------------------------------------------------
#define mMSGHDR_SMPL m1 // Sampler response: m1~m2
.
declare
mudMSGHDR_SMPL
Base
=
m1
ElementSize
=
4
Type
=
ud
.
declare
muwMSGHDR_SMPL
Base
=
m1
ElementSize
=
2
Type
=
uw
#define mMSGHDR_DN m3 // Denoise output: m3~m7 for PA, m3~m5 for PL
.
declare
mdMSGHDR_DN
Base
=
m3
ElementSize
=
4
Type
=
d
#define mMSGHDR_STAT m8 // Statistics output: m8~m9
.
declare
mdMSGHDR_STAT
Base
=
m8
ElementSize
=
4
Type
=
d
.
declare
mubMSGHDR_STAT
Base
=
m8
ElementSize
=
1
Type
=
ub
#define mMSGHDR_DI m10 // DI output: m10~m14
.
declare
mdMSGHDR_DI
Base
=
m10
ElementSize
=
4
Type
=
d
#define mMSGHDR_EOT m15 // EOT
#ifdef GT
#define MSGSRC
#else
#define MSGSRC null:ud
#endif
//---------------------------------------------------------------------------
// End of thread instruction
//---------------------------------------------------------------------------
#ifdef GT
#define END_THREAD send (8) null<1>:d mMSGHDR_EOT nTS_EOT nEOT_MSGDSC
#else // ILK
#define END_THREAD send (8) null<1>:d mMSGHDR_EOT null:ud nTS_EOT nEOT_MSGDSC
#endif
// end of DI.inc
i965_drv_video/shaders/post_processing/Core_Kernels/DI_Hist_Save.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Write
denoise
history
to
memory
shr
(
2
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
2
;2,1>:w 2:w NODDCLR // X,Y origin / 4
add
(
1
)
rMSGSRC.0
<
1
>
:
ud
rMSGSRC.0
<
0
;1,0>:ud uwSPITCH_DIV2<0;1,0>:uw NODDCLR_NODDCHK // Add pitch to X origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_HIST
:
ud
NODDCHK
//
bl
ock
width
and
height
(
4
x2
)
mov
(
8
)
mMSGHDR_HIST
<
1
>
:
ud
rMSGSRC.0
<
8
;8,1>:ud // message header
mov
(
1
)
mudMSGHDR_HIST
(
1
)
<
1
>
udRESP
(
nDI_HIST_OFFSET
,
0
)
<
0
;1,0> // Move denoise history to MRF
send
(
8
)
dNULLREG
mMSGHDR_HIST
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_HIST
+
nBI_STMM_HISTORY_OUTPUT
:
ud
i965_drv_video/shaders/post_processing/Core_Kernels/DI_SAVE_PA.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
shl
(
1
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
0
;1,0>:w 1:w NODDCLR // H. block origin need to be doubled
mov
(
1
)
rMSGSRC.1
<
1
>
:
ud
wORIY
<
0
;1,0>:w NODDCLR_NODDCHK // Block origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_DI
:
ud
NODDCHK
//
Bl
ock
width
and
height
(
32
x8
)
add
(
4
)
pCF_Y_OFFSET
<
1
>
:
uw
ubDEST_CF_OFFSET
<
4
;4,1>:ub nDEST_YUV_REG*nGRFWIB:w // Initial Y,U,V offset in YUV422 block
//
Pack
2
nd
field
Y
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
16
)
r
[
pCF_Y_OFFSET
,
%
1
*
nGRFWIB
]
<
2
>
ubRESP
(
nDI_PREV_FRAME_LUMA_OFFSET
,
%
1
*
16
)
}
//
Pack
1
st
field
Y
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
16
)
r
[
pCF_Y_OFFSET
,
%
1
+
4
*
nGRFWIB
]
<
2
>
ubRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
,
%
1
*
16
)
}
//
Pack
2
nd
field
U
$for
(
0
; <nUV_NUM_OF_ROWS; 1) {
mov
(
8
)
r
[
pCF_U_OFFSET
,
%
1
*
nGRFWIB
]
<
4
>
ubRESP
(
nDI_PREV_FRAME_CHROMA_OFFSET
,
%
1
*
16
+
1
)
<
16
;8,2> //U pixels
}
//
Pack
1
st
field
U
$for
(
0
; <nUV_NUM_OF_ROWS; 1) {
mov
(
8
)
r
[
pCF_U_OFFSET
,
%
1
+
4
*
nGRFWIB
]
<
4
>
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
,
%
1
*
16
+
1
)
<
16
;8,2> //U pixels
}
//
Pack
2
nd
field
V
$for
(
0
; <nUV_NUM_OF_ROWS; 1) {
mov
(
8
)
r
[
pCF_V_OFFSET
,
%
1
*
nGRFWIB
]
<
4
>
ubRESP
(
nDI_PREV_FRAME_CHROMA_OFFSET
,
%
1
*
16
)
<
16
;8,2> //Vpixels
}
//
Packs1st
field
V
$for
(
0
; <nUV_NUM_OF_ROWS; 1) {
mov
(
8
)
r
[
pCF_V_OFFSET
,
%
1
+
4
*
nGRFWIB
]
<
4
>
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
,
%
1
*
16
)
<
16
;8,2> //Vpixels
}
//
save
the
previous
frame
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
$for
(
0
; <4; 1) {
mov
(
8
)
mudMSGPAYLOAD
(
%
1
)
<
1
>
udDEST_YUV
(
%
1
)
REGION
(
8
,
1
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_DI
+
nBI_DESTINATION_1_YUV
:
ud
//
save
the
current
frame
mov
(
8
)
mMSGHDR
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud
$for
(
0
; <4; 1) {
mov
(
8
)
mudMSGPAYLOAD
(
%
1
)
<
1
>
udDEST_YUV
(
%
1
+
4
)
REGION
(
8
,
1
)
}
send
(
8
)
dNULLREG
mMSGHDR
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPW_MSG_SIZE_DI
+
nBI_DESTINATION_2_YUV
:
ud
i965_drv_video/shaders/post_processing/Core_Kernels/DNDI.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// Module name: DI.inc
#ifdef GT
// GT DI Kernel
#else // ILK
// ILK DI Kernel
#endif
#include "undefall.inc"
//---------------------------------------------------------------------------
// Message descriptors
//---------------------------------------------------------------------------
// Extended message descriptor
// Message descriptor for sampler read
// // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
// // 1 (header present 1) 0 11 (SIMD32/64 mode)
// // 1000 (message type) 0000 (DI state index)
// // 00000000 (binding table index - set later)
// // = 0x040b8000
#define nSMPL_DI_MSGDSC 0x040b8000
#define nSMPL_RESP_LEN_DNDI nRESLEN_12 // 12 - for DN + DI Alg
#define nSMPL_RESP_LEN_DN_PL nRESLEN_5 // 5 - for DN Planar Alg
#define nSMPL_RESP_LEN_DN_PA nRESLEN_9 // 9 - for DN Packed Alg
#define nSMPL_RESP_LEN_DI nRESLEN_9 // 9 - for DI Only Alg
#define nSMPL_RESP_LEN_PDI nRESLEN_11 // 11 - for Partial DI Alg
// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
#define nDPMW_MSG_LEN_STMM nMSGLEN_1 // 1 - For STMM Save
#define nDPMW_MSG_LEN_HIST nMSGLEN_1 // 1 - For Denoise History Save
#define nDPMW_MSG_LEN_PA_DN_DI nMSGLEN_4 // 4 - For DN Curr Save
#define nDPMW_MSG_LEN_PA_DN_NODI nMSGLEN_8 // 8 - For DN Curr Save (denoise only - DI disabled)
#define nDPMW_MSG_LEN_PL_DN_DI nMSGLEN_2 // 2 - For DN Curr Save
#define nDPMW_MSG_LEN_PL_DN_NODI nMSGLEN_4 // 4 - For DN Curr Save (denoise only - DI disabled)
#define nDPW_BLOCK_SIZE_STMM nBLOCK_WIDTH_8+nBLOCK_HEIGHT_4 // Y block size 8x4
#undef nDPW_BLOCK_SIZE_DI
#undef nDPW_MSG_SIZE_DI
#define nDPW_BLOCK_SIZE_DI nBLOCK_WIDTH_32+nBLOCK_HEIGHT_4
#define nDPW_MSG_SIZE_DI nMSGLEN_4
//---------------------------------------------------------------------------
// Kernel GRF variables
//---------------------------------------------------------------------------
// Defines for DI enabled
#define nDI_PREV_FRAME_LUMA_OFFSET 0
#define nDI_PREV_FRAME_CHROMA_OFFSET 2
#define nDI_CURR_FRAME_LUMA_OFFSET 4
#define nDI_CURR_FRAME_CHROMA_OFFSET 6
#define nDI_STMM_OFFSET 8
#define nDI_HIST_OFFSET 9
#define nDI_CURR_2ND_FIELD_LUMA_OFFSET 10
#define nDI_CURR_2ND_FIELD_CHROMA_OFFSET 11
// Defines for DI disabled
#define nNODI_LUMA_OFFSET 0
#define nNODI_HIST_OFFSET 4
#define nNODI_CHROMA_OFFSET 5
#ifdef DI_ENABLE
#define nHIST_OFFSET nDI_HIST_OFFSET
#undef nY_NUM_OF_ROWS
#define nY_NUM_OF_ROWS 8 // Number of Y rows per block (4 rows for each frame)
#undef nUV_NUM_OF_ROWS
#define nUV_NUM_OF_ROWS 8 // Number of U/V rows per block
#endif
#ifdef DI_DISABLE
#define nHIST_OFFSET nNODI_HIST_OFFSET
#endif
#if (nSRC_REGION==nREGION_2)
#define ub2SRC_Y ub2BOT_Y
#define ub2SRC_U ub2BOT_U
#define ub2SRC_V ub2BOT_V
#define uwDEST_Y uwBOT_Y
#define uwDEST_U uwBOT_U
#define uwDEST_V uwBOT_V
#define nDEST_YUV_REG nTOP_Y
#define udDEST_YUV udTOP_Y_IO
#define nRESP nTEMP0 // DI return message requires 12 GRFs
#define nDN_YUV nTOP_Y // Space for Packing DN for next run requires 8 GRFs
#undef nSRC_REGION
#define nSRC_REGION nREGION_2
#else
#define ub2SRC_Y ub2TOP_Y
#define ub2SRC_U ub2TOP_U
#define ub2SRC_V ub2TOP_V
#define uwDEST_Y uwTOP_Y
#define uwDEST_U uwTOP_U
#define uwDEST_V uwTOP_V
#define nDEST_YUV_REG nBOT_Y
#define udDEST_YUV udBOT_Y_IO
#define nRESP nTEMP0 // DI return message requires 12 GRFs
#define nDN_YUV nBOT_Y // Space for Packing DN for next run requires 8 GRFs
#undef nSRC_REGION
#define nSRC_REGION nREGION_1 // REGION_1 will be the source region for first kernel
#endif
// Message response (Denoised & DI-ed pixels & statistics)
.
declare
udRESP
Base
=
REG
(
r
,
nRESP
)
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
DstRegion
=<
1
>
Type
=
ud
.
declare
ubRESP
Base
=
REG
(
r
,
nRESP
)
ElementSize
=
1
SrcRegion
=
REGION
(
16
,
1
)
DstRegion
=<
1
>
Type
=
ub
// For Denoised Curr Output (Used as Priv in Next Run)
.
declare
ubDN_YUV
Base
=
REG
(
r
,
nDN_YUV
)
ElementSize
=
1
Type
=
ub
.
declare
udDN_YUV
Base
=
REG
(
r
,
nDN_YUV
)
ElementSize
=
4
Type
=
ud
#define npDN_YUV nDN_YUV*nGRFWIB
// For DI Process Output (1st and 2nd Frames Output)
//.declare udDI_YUV_PRIV Base=REG(r,nTEMP0) ElementSize=4 Type=ud // Previous frame DI output
//.declare udDI_YUV_CURR Base=REG(r,nTEMP0) ElementSize=4 Type=ud // Current frame DI output
//#define npDI_YUV nTEMP0*nGRFWIB
//---------------------------------------------------------------------------
// Kernel MRF variables
//---------------------------------------------------------------------------
#define mMSG_SMPL m1 // Sampler Command is in: m1~m2
.
declare
mudMSG_SMPL
Base
=
mMSG_SMPL
ElementSize
=
4
Type
=
ud
.
declare
muwMSG_SMPL
Base
=
mMSG_SMPL
ElementSize
=
2
Type
=
uw
#define mMSGHDR_DN m1 // Denoise Output: m1~m9 for PA, m3~m5 for PL
.
declare
mudMSGHDR_DN
Base
=
mMSGHDR_DN
ElementSize
=
4
Type
=
ud
.
declare
mubMSGHDR_DN
Base
=
mMSGHDR_DN
ElementSize
=
1
Type
=
ub
#define mMSGHDR_STMM m11 // STMM Output: m11~m12
.
declare
mudMSGHDR_STMM
Base
=
mMSGHDR_STMM
ElementSize
=
4
Type
=
ud
#define mMSGHDR_HIST m13 // HIST Output: m13~m14
.
declare
mudMSGHDR_HIST
Base
=
mMSGHDR_HIST
ElementSize
=
1
Type
=
ud
#define mMSGHDR_DI_1ST m1 // DI output: m1~m5
.
declare
mudMSGHDR_DI_1ST
Base
=
mMSGHDR_DI_1ST
ElementSize
=
4
Type
=
ud
#define mMSGHDR_DI_2ND m6 // DI output: m6~m10
.
declare
mudMSGHDR_DI_2ND
Base
=
mMSGHDR_DI_2ND
ElementSize
=
4
Type
=
ud
// end of DNDI.inc
i965_drv_video/shaders/post_processing/Core_Kernels/DNDI_COMMAND.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Activate
the
DNDI
send
command
mov
(
8
)
mudMSG_SMPL
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud NODDCLR // message header
mov
(
1
)
muwMSG_SMPL
(
1
,
4
)
<
1
>
wORIX
<
0
;1,0>:w NODDCLR_NODDCHK// horizontal origin
mov
(
1
)
muwMSG_SMPL
(
1
,
12
)
<
1
>
wORIY
<
0
;1,0>:w NODDCLR_NODDCHK // vertical origin
//
mov
(
2
)
muwMSG_SMPL
(
1
,
4
)
<
2
>
wORIX
<
2
;2,1>:w NODDCHK// problem during compile !! when using this line
send
(
8
)
udRESP
(
0
)
<
1
>
mMSG_SMPL
udDUMMY_NULL
nSMPL_ENGINE
nSMPL_DI_MSGDSC
+
nSMPL_RESP_LEN
+
nBI_CURRENT_SRC_YUV_HW_DI
:
ud
i965_drv_video/shaders/post_processing/Core_Kernels/DNDI_Hist_Save.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//
Write
denoise
history
to
memory
shr
(
2
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
2
;2,1>:w 2:w NODDCLR // X,Y origin / 4
add
(
1
)
rMSGSRC.0
<
1
>
:
ud
rMSGSRC.0
<
0
;1,0>:ud uwSPITCH_DIV2<0;1,0>:uw NODDCLR_NODDCHK// Add pitch to X origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_HIST
:
ud
NODDCHK
//
bl
ock
width
and
height
(
4
x2
)
mov
(
8
)
mMSGHDR_HIST
<
1
>
:
ud
rMSGSRC.0
<
8
;8,1>:ud // message header
mov
(
2
)
mudMSGHDR_HIST
(
1
)
<
1
>
udRESP
(
nNODI_HIST_OFFSET
,
0
)
<
2
;2,1> // Move denoise history to MRF
send
(
8
)
dNULLREG
mMSGHDR_HIST
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_HIST
+
nBI_STMM_HISTORY_OUTPUT
:
ud
i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_16x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PA_AVS_IEF_16x8.asm
----------
#
include
"
AVS_IEF.inc
"
//------------------------------------------------------------------------------
//
2
sampler
reads
for
8
x8
YUV
packed
//------------------------------------------------------------------------------
#
include
"PA_AVS_IEF_Sample.asm"
//------------------------------------------------------------------------------
//
Unpacking
sampler
reads
to
4
:
4
:
4
internal
planar
//------------------------------------------------------------------------------
#
include
"
PA_AVS_IEF_Unpack_16x8.asm
"
//------------------------------------------------------------------------------
i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x4.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PA_AVS_IEF_8x4.asm
----------
#
include
"
AVS_IEF.inc
"
//------------------------------------------------------------------------------
//
2
sampler
reads
for
8
x8
YUV
packed
//------------------------------------------------------------------------------
#
include
"PA_AVS_IEF_Sample.asm"
//------------------------------------------------------------------------------
//
Unpacking
sampler
data
to
4
:
2
:
0
internal
planar
//------------------------------------------------------------------------------
#
include
"
PA_AVS_IEF_Unpack_8x4.asm
"
//------------------------------------------------------------------------------
i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PA_AVS_IEF_8x8.asm
----------
#
include
"
AVS_IEF.inc
"
//------------------------------------------------------------------------------
//
2
sampler
reads
for
8
x8
YUV
packed
//------------------------------------------------------------------------------
#
include
"PA_AVS_IEF_Sample.asm"
//------------------------------------------------------------------------------
//
Unpacking
sampler
data
to
4
:
2
:
2
internal
planar
//------------------------------------------------------------------------------
#
include
"
PA_AVS_IEF_Unpack_8x8.asm
"
//------------------------------------------------------------------------------
i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Sample.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PA_AVS_IEF_Sample.asm
----------
//------------------------------------------------------------------------------
//
2
sampler
reads
for
8
x8
YUV
packed
//------------------------------------------------------------------------------
//
1
st
8
x8
setup
#
include
"AVS_SetupFirstBlock.asm"
//
Enable
RGB
(
YUV
)
ch
annels
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_RGB_CHANNELS
:
ud
mov
(
16
)
mAVS_8x8_HDR.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
0
)
<
1
>
mAVS_8x8_HDR
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_3CH
+
nSI_SRC_YUV
+
nBI_CURRENT_SRC_YUV
//
Return
YUV
in
12
GRFs
//
2
nd
8
x8
setup
#
include
"
AVS_SetupSecondBlock.asm
"
mov
(
16
)
mAVS_8x8_HDR_2.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE_2
(
0
)
<
1
>
mAVS_8x8_HDR_2
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_3CH
+
nSI_SRC_YUV
+
nBI_CURRENT_SRC_YUV
//
Return
YUV
in
12
GRFs
i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PA_AVS_IEF_Unpack_16x8.asm
----------
#
ifdef
AVS_OUTPUT_16_BIT
//
Output
is
packed
in
AVYU
format
//
Move
first
8
x8
word
s
of
Y
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
1
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
4
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
5
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
8
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
9
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
12
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
13
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
16
,
1
)
<
4
>
uwAVS_RESPONSE
(
8
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
17
,
1
)
<
4
>
uwAVS_RESPONSE
(
8
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
20
,
1
)
<
4
>
uwAVS_RESPONSE
(
8
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
21
,
1
)
<
4
>
uwAVS_RESPONSE
(
8
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
24
,
1
)
<
4
>
uwAVS_RESPONSE
(
9
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
25
,
1
)
<
4
>
uwAVS_RESPONSE
(
9
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
28
,
1
)
<
4
>
uwAVS_RESPONSE
(
9
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
29
,
1
)
<
4
>
uwAVS_RESPONSE
(
9
,
12
)
<
4
;4,1>
//
Move
first
8
x8
word
s
of
U
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
0
)
<
4
>
uwAVS_RESPONSE
(
4
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
1
,
0
)
<
4
>
uwAVS_RESPONSE
(
4
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
4
,
0
)
<
4
>
uwAVS_RESPONSE
(
4
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
5
,
0
)
<
4
>
uwAVS_RESPONSE
(
4
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
8
,
0
)
<
4
>
uwAVS_RESPONSE
(
5
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
9
,
0
)
<
4
>
uwAVS_RESPONSE
(
5
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
12
,
0
)
<
4
>
uwAVS_RESPONSE
(
5
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
13
,
0
)
<
4
>
uwAVS_RESPONSE
(
5
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
16
,
0
)
<
4
>
uwAVS_RESPONSE
(
10
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
17
,
0
)
<
4
>
uwAVS_RESPONSE
(
10
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
20
,
0
)
<
4
>
uwAVS_RESPONSE
(
10
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
21
,
0
)
<
4
>
uwAVS_RESPONSE
(
10
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
24
,
0
)
<
4
>
uwAVS_RESPONSE
(
11
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
25
,
0
)
<
4
>
uwAVS_RESPONSE
(
11
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
28
,
0
)
<
4
>
uwAVS_RESPONSE
(
11
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
29
,
0
)
<
4
>
uwAVS_RESPONSE
(
11
,
12
)
<
4
;4,1>
//
Move
first
8
x8
word
s
of
V
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
2
)
<
4
>
uwAVS_RESPONSE
(
0
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
1
,
2
)
<
4
>
uwAVS_RESPONSE
(
0
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
4
,
2
)
<
4
>
uwAVS_RESPONSE
(
0
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
5
,
2
)
<
4
>
uwAVS_RESPONSE
(
0
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
8
,
2
)
<
4
>
uwAVS_RESPONSE
(
1
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
9
,
2
)
<
4
>
uwAVS_RESPONSE
(
1
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
12
,
2
)
<
4
>
uwAVS_RESPONSE
(
1
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
13
,
2
)
<
4
>
uwAVS_RESPONSE
(
1
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
16
,
2
)
<
4
>
uwAVS_RESPONSE
(
6
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
17
,
2
)
<
4
>
uwAVS_RESPONSE
(
6
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
20
,
2
)
<
4
>
uwAVS_RESPONSE
(
6
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
21
,
2
)
<
4
>
uwAVS_RESPONSE
(
6
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
24
,
2
)
<
4
>
uwAVS_RESPONSE
(
7
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
25
,
2
)
<
4
>
uwAVS_RESPONSE
(
7
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
28
,
2
)
<
4
>
uwAVS_RESPONSE
(
7
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
29
,
2
)
<
4
>
uwAVS_RESPONSE
(
7
,
12
)
<
4
;4,1>
//
Move
first
8
x8
word
s
of
A
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
1
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
4
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
5
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
8
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
9
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
12
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
13
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
16
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
17
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
20
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
21
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
24
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
25
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
28
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
29
,
3
)
<
4
>
0
:
uw
//
Move
second
8
x8
word
s
of
Y
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
3
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
6
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
7
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
10
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
11
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
14
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
15
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
18
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
19
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
22
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
23
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
26
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
27
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
30
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
31
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
12
)
<
4
;4,1>
//
Move
second
8
x8
word
s
of
U
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
3
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
6
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
7
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
10
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
11
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
14
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
15
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
18
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
19
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
22
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
23
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
26
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
27
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
30
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
31
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
12
)
<
4
;4,1>
//
Move
second
8
x8
word
s
of
V
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
3
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
6
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
7
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
10
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
11
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
14
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
15
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
18
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
19
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
22
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
23
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
26
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
27
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
30
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
31
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
12
)
<
4
;4,1>
//
Move
second
8
x8
word
s
of
A
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
3
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
6
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
7
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
10
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
11
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
14
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
15
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
18
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
19
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
22
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
23
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
26
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
27
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
30
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
31
,
3
)
<
4
>
0
:
uw
/*
This
section
will
be
used
if
16
-
bit
output
is
needed
in
planar
format
-
vK
//
Move
first
8
x8
word
s
of
Y
to
dest
GRF
mov
(
8
)
uwDEST_Y
(
0
)
<
1
>
uwAVS_RESPONSE
(
2
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
1
)
<
1
>
uwAVS_RESPONSE
(
2
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
2
)
<
1
>
uwAVS_RESPONSE
(
3
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
3
)
<
1
>
uwAVS_RESPONSE
(
3
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
4
)
<
1
>
uwAVS_RESPONSE
(
8
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
5
)
<
1
>
uwAVS_RESPONSE
(
8
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
6
)
<
1
>
uwAVS_RESPONSE
(
9
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
7
)
<
1
>
uwAVS_RESPONSE
(
9
,
8
)
<
8
;4,1>
//
Move
first
8
x8
word
s
of
V
to
dest
GRF
mov
(
8
)
uwDEST_V
(
0
)
<
1
>
ubAVS_RESPONSE
(
0
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
1
)
<
1
>
ubAVS_RESPONSE
(
0
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
2
)
<
1
>
ubAVS_RESPONSE
(
1
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
3
)
<
1
>
ubAVS_RESPONSE
(
1
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
4
)
<
1
>
ubAVS_RESPONSE
(
6
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
5
)
<
1
>
ubAVS_RESPONSE
(
6
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
6
)
<
1
>
ubAVS_RESPONSE
(
7
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
7
)
<
1
>
ubAVS_RESPONSE
(
7
,
8
)
<
8
;4,1>
//
Move
first
8
x8
word
s
of
U
to
dest
GRF
mov
(
8
)
uwDEST_U
(
0
)
<
1
>
ubAVS_RESPONSE
(
4
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
1
)
<
1
>
ubAVS_RESPONSE
(
4
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
2
)
<
1
>
ubAVS_RESPONSE
(
5
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
3
)
<
1
>
ubAVS_RESPONSE
(
5
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
4
)
<
1
>
ubAVS_RESPONSE
(
10
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
5
)
<
1
>
ubAVS_RESPONSE
(
10
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
6
)
<
1
>
ubAVS_RESPONSE
(
11
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
7
)
<
1
>
ubAVS_RESPONSE
(
11
,
8
)
<
8
;4,1>
//
Move
second
8
x8
word
s
of
Y
to
dest
GRF
mov
(
8
)
uwDEST_Y
(
0
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
2
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
1
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
2
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
2
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
3
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
3
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
3
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
4
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
8
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
5
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
8
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
6
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
9
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
7
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
9
,
8
)
<
8
;4,1>
//
Move
second
8
x8
word
s
of
V
to
dest
GRF
mov
(
8
)
uwDEST_V
(
0
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
0
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
0
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
1
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
3
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
1
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
4
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
6
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
5
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
6
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
6
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
7
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
7
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
7
,
8
)
<
8
;4,1>
//
Move
second
8
x8
word
s
of
U
to
dest
GRF
mov
(
8
)
uwDEST_U
(
0
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
4
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
4
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
5
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
3
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
5
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
4
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
10
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
5
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
10
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
6
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
11
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
7
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
11
,
8
)
<
8
;4,1>
*/
#
else
/*
OUTPUT_8_BIT
*/
//
Move
first
8
x8
word
s
of
Y
to
dest
GRF
mov
(
8
)
uwDEST_Y
(
0
)
<
1
>
ubAVS_RESPONSE
(
2
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
1
)
<
1
>
ubAVS_RESPONSE
(
2
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
2
)
<
1
>
ubAVS_RESPONSE
(
3
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
3
)
<
1
>
ubAVS_RESPONSE
(
3
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
4
)
<
1
>
ubAVS_RESPONSE
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
5
)
<
1
>
ubAVS_RESPONSE
(
8
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
6
)
<
1
>
ubAVS_RESPONSE
(
9
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
7
)
<
1
>
ubAVS_RESPONSE
(
9
,
8
+
1
)
<
16
;4,2>
//
Move
first
8
x8
word
s
of
V
to
dest
GRF
mov
(
8
)
uwDEST_V
(
0
)
<
1
>
ubAVS_RESPONSE
(
0
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
1
)
<
1
>
ubAVS_RESPONSE
(
0
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
2
)
<
1
>
ubAVS_RESPONSE
(
1
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
3
)
<
1
>
ubAVS_RESPONSE
(
1
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
4
)
<
1
>
ubAVS_RESPONSE
(
6
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
5
)
<
1
>
ubAVS_RESPONSE
(
6
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
6
)
<
1
>
ubAVS_RESPONSE
(
7
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
7
)
<
1
>
ubAVS_RESPONSE
(
7
,
8
+
1
)
<
16
;4,2>
//
Move
first
8
x8
word
s
of
U
to
dest
GRF
mov
(
8
)
uwDEST_U
(
0
)
<
1
>
ubAVS_RESPONSE
(
4
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
1
)
<
1
>
ubAVS_RESPONSE
(
4
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
2
)
<
1
>
ubAVS_RESPONSE
(
5
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
3
)
<
1
>
ubAVS_RESPONSE
(
5
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
4
)
<
1
>
ubAVS_RESPONSE
(
10
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
5
)
<
1
>
ubAVS_RESPONSE
(
10
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
6
)
<
1
>
ubAVS_RESPONSE
(
11
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
7
)
<
1
>
ubAVS_RESPONSE
(
11
,
8
+
1
)
<
16
;4,2>
//
Move
second
8
x8
word
s
of
Y
to
dest
GRF
mov
(
8
)
uwDEST_Y
(
0
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
2
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
2
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
3
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
3
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
3
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
4
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
5
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
8
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
6
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
9
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
7
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
9
,
8
+
1
)
<
16
;4,2>
//
Move
second
8
x8
word
s
of
V
to
dest
GRF
mov
(
8
)
uwDEST_V
(
0
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
0
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
0
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
1
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
3
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
1
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
4
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
6
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
5
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
6
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
6
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
7
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
7
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
7
,
8
+
1
)
<
16
;4,2>
//
Move
second
8
x8
word
s
of
U
to
dest
GRF
mov
(
8
)
uwDEST_U
(
0
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
4
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
4
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
5
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
3
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
5
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
4
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
10
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
5
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
10
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
6
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
11
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
7
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
11
,
8
+
1
)
<
16
;4,2>
#
endif
//------------------------------------------------------------------------------
//
Re
-
define
new
number
of
lines
#
undef
nUV_NUM_OF_ROWS
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
#
define
nUV_NUM_OF_ROWS
8
i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PA_AVS_IEF_Unpack_8x8.asm
----------
//
Yoni:
In
order
to
optimize
unpacking
,
3
methods
are
being
ch
ecked
:
//
1
.
AVS_ORIGINAL
//
2
.
AVS_ROUND_TO_8_BITS
//
3
.
AVS_INDIRECT_ACCESS
//
//
Only
1
method
should
stay
in
the
code
//#
define
AVS_ROUND_TO_8_BITS
//#
define
AVS_INDIRECT_ACCESS
//
Move
first
8
x8
word
s
of
Y
to
dest
GRF
mov
(
8
)
uwDEST_Y
(
0
)
<
1
>
ubAVS_RESPONSE
(
2
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
1
)
<
1
>
ubAVS_RESPONSE
(
2
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
2
)
<
1
>
ubAVS_RESPONSE
(
3
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
3
)
<
1
>
ubAVS_RESPONSE
(
3
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
4
)
<
1
>
ubAVS_RESPONSE
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
5
)
<
1
>
ubAVS_RESPONSE
(
8
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
6
)
<
1
>
ubAVS_RESPONSE
(
9
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
7
)
<
1
>
ubAVS_RESPONSE
(
9
,
8
+
1
)
<
16
;4,2>
//
Move
first
4
x8
word
s
of
V
to
dest
GRF
mov
(
4
)
uwDEST_V
(
0
)
<
1
>
ubAVS_RESPONSE
(
0
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
0
,
8
)
<
1
>
ubAVS_RESPONSE
(
1
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
1
)
<
1
>
ubAVS_RESPONSE
(
6
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
1
,
8
)
<
1
>
ubAVS_RESPONSE
(
7
,
1
)
<
16
;2,4>
//
Move
first
4
x8
word
s
of
U
to
dest
GRF
mov
(
4
)
uwDEST_U
(
0
)
<
1
>
ubAVS_RESPONSE
(
4
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
0
,
8
)
<
1
>
ubAVS_RESPONSE
(
5
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
1
)
<
1
>
ubAVS_RESPONSE
(
10
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
1
,
8
)
<
1
>
ubAVS_RESPONSE
(
11
,
1
)
<
16
;2,4>
//
Move
second
8
x8
word
s
of
Y
to
dest
GRF
mov
(
8
)
uwDEST_Y
(
0
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
2
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
2
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
3
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
3
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
3
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
4
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
5
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
8
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
6
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
9
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
7
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
9
,
8
+
1
)
<
16
;4,2>
//
Move
second
4
x8
word
s
of
V
to
dest
GRF
mov
(
4
)
uwDEST_V
(
0
,
4
)
<
1
>
ubAVS_RESPONSE_2
(
0
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
0
,
12
)
<
1
>
ubAVS_RESPONSE_2
(
1
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
1
,
4
)
<
1
>
ubAVS_RESPONSE_2
(
6
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
1
,
12
)
<
1
>
ubAVS_RESPONSE_2
(
7
,
1
)
<
16
;2,4>
//
Move
second
4
x8
word
s
of
U
to
dest
GRF
mov
(
4
)
uwDEST_U
(
0
,
4
)
<
1
>
ubAVS_RESPONSE_2
(
4
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
0
,
12
)
<
1
>
ubAVS_RESPONSE_2
(
5
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
1
,
4
)
<
1
>
ubAVS_RESPONSE_2
(
10
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
1
,
12
)
<
1
>
ubAVS_RESPONSE_2
(
11
,
1
)
<
16
;2,4>
//------------------------------------------------------------------------------
//
Re
-
define
new
number
of
lines
#
undef
nUV_NUM_OF_ROWS
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
#
define
nUV_NUM_OF_ROWS
8
i965_drv_video/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PA_AVS_IEF_Unpack_8x8.asm
----------
//
Yoni:
In
order
to
optimize
unpacking
,
3
methods
are
being
ch
ecked
:
//
1
.
AVS_ORIGINAL
//
2
.
AVS_ROUND_TO_8_BITS
//
3
.
AVS_INDIRECT_ACCESS
//
//
Only
1
method
should
stay
in
the
code
//#
define
AVS_ROUND_TO_8_BITS
//#
define
AVS_INDIRECT_ACCESS
//
Move
first
8
x8
word
s
of
Y
to
dest
GRF
mov
(
8
)
uwDEST_Y
(
0
)
<
1
>
ubAVS_RESPONSE
(
2
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
1
)
<
1
>
ubAVS_RESPONSE
(
2
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
2
)
<
1
>
ubAVS_RESPONSE
(
3
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
3
)
<
1
>
ubAVS_RESPONSE
(
3
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
4
)
<
1
>
ubAVS_RESPONSE
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
5
)
<
1
>
ubAVS_RESPONSE
(
8
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
6
)
<
1
>
ubAVS_RESPONSE
(
9
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
7
)
<
1
>
ubAVS_RESPONSE
(
9
,
8
+
1
)
<
16
;4,2>
//
Move
first
4
x8
word
s
of
V
to
dest
GRF
mov
(
4
)
uwDEST_V
(
0
)
<
1
>
ubAVS_RESPONSE
(
0
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
0
,
8
)
<
1
>
ubAVS_RESPONSE
(
0
,
8
+
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
1
)
<
1
>
ubAVS_RESPONSE
(
1
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
1
,
8
)
<
1
>
ubAVS_RESPONSE
(
1
,
8
+
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
2
)
<
1
>
ubAVS_RESPONSE
(
6
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
2
,
8
)
<
1
>
ubAVS_RESPONSE
(
6
,
8
+
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
3
)
<
1
>
ubAVS_RESPONSE
(
7
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
3
,
8
)
<
1
>
ubAVS_RESPONSE
(
7
,
8
+
1
)
<
16
;2,4>
//
Move
first
4
x8
word
s
of
U
to
dest
GRF
mov
(
4
)
uwDEST_U
(
0
)
<
1
>
ubAVS_RESPONSE
(
4
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
0
,
8
)
<
1
>
ubAVS_RESPONSE
(
4
,
8
+
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
1
)
<
1
>
ubAVS_RESPONSE
(
5
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
1
,
8
)
<
1
>
ubAVS_RESPONSE
(
5
,
8
+
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
2
)
<
1
>
ubAVS_RESPONSE
(
10
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
2
,
8
)
<
1
>
ubAVS_RESPONSE
(
10
,
8
+
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
3
)
<
1
>
ubAVS_RESPONSE
(
11
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
3
,
8
)
<
1
>
ubAVS_RESPONSE
(
11
,
8
+
1
)
<
16
;2,4>
//
Move
second
8
x8
word
s
of
Y
to
dest
GRF
mov
(
8
)
uwDEST_Y
(
0
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
2
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
2
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
3
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
3
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
3
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
4
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
5
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
8
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
6
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
9
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_Y
(
7
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
9
,
8
+
1
)
<
16
;4,2>
//
Move
second
4
x8
word
s
of
V
to
dest
GRF
mov
(
4
)
uwDEST_V
(
0
,
4
)
<
1
>
ubAVS_RESPONSE_2
(
0
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
0
,
12
)
<
1
>
ubAVS_RESPONSE_2
(
0
,
8
+
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
1
,
4
)
<
1
>
ubAVS_RESPONSE_2
(
1
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
1
,
12
)
<
1
>
ubAVS_RESPONSE_2
(
1
,
8
+
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
2
,
4
)
<
1
>
ubAVS_RESPONSE_2
(
6
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
2
,
12
)
<
1
>
ubAVS_RESPONSE_2
(
6
,
8
+
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
3
,
4
)
<
1
>
ubAVS_RESPONSE_2
(
7
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_V
(
3
,
12
)
<
1
>
ubAVS_RESPONSE_2
(
7
,
8
+
1
)
<
16
;2,4>
//
Move
second
4
x8
word
s
of
U
to
dest
GRF
mov
(
4
)
uwDEST_U
(
0
,
4
)
<
1
>
ubAVS_RESPONSE_2
(
4
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
0
,
12
)
<
1
>
ubAVS_RESPONSE_2
(
4
,
8
+
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
1
,
4
)
<
1
>
ubAVS_RESPONSE_2
(
5
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
1
,
12
)
<
1
>
ubAVS_RESPONSE_2
(
5
,
8
+
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
2
,
4
)
<
1
>
ubAVS_RESPONSE_2
(
10
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
2
,
12
)
<
1
>
ubAVS_RESPONSE_2
(
10
,
8
+
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
3
,
4
)
<
1
>
ubAVS_RESPONSE_2
(
11
,
1
)
<
16
;2,4>
mov
(
4
)
uwDEST_U
(
3
,
12
)
<
1
>
ubAVS_RESPONSE_2
(
11
,
8
+
1
)
<
16
;2,4>
//------------------------------------------------------------------------------
//
Re
-
define
new
number
of
lines
#
undef
nUV_NUM_OF_ROWS
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
#
define
nUV_NUM_OF_ROWS
8
i965_drv_video/shaders/post_processing/Core_Kernels/PA_DNDI_ALG.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
#
define
DI
_ENABLE
#
include
"
DNDI.inc
"
#
ifdef
DI
_ONLY
#
undef
nSMPL_RESP_LEN
#
define
nSMPL_RESP_LEN
nSMPL_RESP_LEN_DI
//
set
the
number
of
GRF
#
else
#
undef
nSMPL_RESP_LEN
#
define
nSMPL_RESP_LEN
nSMPL_RESP_LEN_DNDI
//
set
the
number
of
GRF
#
endif
#
undef
nDPW_BLOCK_SIZE_HIST
#
define
nDPW_BLOCK_SIZE_HIST
nBLOCK_WIDTH_4
+
nBLOCK_HEIGHT_1
//
HIST
Bl
ock
Si
ze
for
Write
is
4
x2
#
undef
nDPW_BLOCK_SIZE_DN
#
define
nDPW_BLOCK_SIZE_DN
nBLOCK_WIDTH_32
+
nBLOCK_HEIGHT_4
//
DN
Bl
ock
Si
ze
for
Write
is
32
x4
//////////////////////////////////////
Run
the
DN
Al
gorithm
///////////////////////////////////////
#
include
"
DNDI_Command.asm
"
//////////////////////////////////////
Rearrange
for
Internal
Planar
//////////////////////////////
////
move
the
previous
frame
Y
component
to
internal
planar
format
//
$for
(
0
; <nY_NUM_OF_ROWS/2; 1) {
//
mov
(
16
)
uwDEST_Y
(
%
1
,
0
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_LUMA_OFFSET
,
%
1
*
16
)
//}
////
move
the
previous
frame
U
,
V
components
to
internal
planar
format
//
$for
(
0
; <nUV_NUM_OF_ROWS/2; 1) {
//
mov
(
8
)
uwDEST_U
(
0
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_CHROMA_OFFSET
,
%
1
*
16
+
1
)
<
16
;8,2> //U pixels
//
mov
(
8
)
uwDEST_V
(
0
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_CHROMA_OFFSET
,
%
1
*
16
)
<
16
;8,2> //V pixels
//}
////
move
the
current
frame
Y
component
to
internal
planar
format
//
$for
(
0
; <nY_NUM_OF_ROWS/2; 1) {
//
mov
(
16
)
uwDEST_Y
(
%
1
+
4
,
0
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
,
%
1
*
16
)
//}
////
move
the
current
frame
U
,
V
components
to
internal
planar
format
//
$for
(
0
; <nUV_NUM_OF_ROWS/2; 1) {
//
mov
(
8
)
uwDEST_U
(
2
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
,
%
1
*
16
+
1
)
<
16
;8,2> //U pixels
//
mov
(
8
)
uwDEST_V
(
2
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
,
%
1
*
16
)
<
16
;8,2> //V pixels
//}
//////////////////////////////////////
Save
the
STMM
Data
for
Next
Run
/////////////////////////
//
Write
STMM
to
memory
shr
(
1
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
0
;1,0>:w 1:w NODDCLR // X origin / 2
mov
(
1
)
rMSGSRC.1
<
1
>
:
ud
wORIY
<
0
;1,0>:w NODDCLR_NODDCHK // Y origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_STMM
:
ud
NODDCHK
//
bl
ock
width
and
height
(
8
x4
)
mov
(
8
)
mudMSGHDR_STMM
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud // message header
mov
(
8
)
mudMSGHDR_STMM
(
1
)
<
1
>
udRESP
(
nDI_STMM_OFFSET
,
0
)
//
Move
STMM
to
MRF
send
(
8
)
dNULLREG
mMSGHDR_STMM
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_STMM
+
nBI_STMM_HISTORY_OUTPUT
:
ud
//////////////////////////////////////
Save
the
History
Data
for
Next
Run
/////////////////////////
#
ifdef
DI
_ONLY
#
else
#
include
"DI_Hist_Save.asm"
//////////////////////////////////////
Pack
and
Save
the
DN
Curr
Frame
for
Next
Run
///////////////
//
check
top
/
bottom
field
first
cmp.e.f0.0
(
1
)
null
<
1
>
:
w
ubTFLD_FIRST
<
0
;1,0>:ub 1:w
add
(
4
)
pCF_Y_OFFSET
<
1
>
:
uw
ubSRC_CF_OFFSET
<
4
;4,1>:ub npDN_YUV:uw
//
set
the
save
DN
position
shl
(
1
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
0
;1,0>:w 1:w NODDCLR // X origin * 2
mov
(
1
)
rMSGSRC.1
<
1
>
:
ud
wORIY
<
0
;1,0>:w NODDCLR_NODDCHK // Y origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_DN
:
ud
NODDCHK
//
bl
ock
width
and
height
(
8
x4
)
mov
(
8
)
mudMSGHDR_DN
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud
(
f0.0
)
jmpi
(
1
)
TOP_FIELD_FIRST
BOTTOM_FIELD_FIRST:
//
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
//
mov
(
16
)
r
[
pCF_Y_OFFSET
,
%
1
*
32
]
<
2
>
:
ub
ubRESP
(
nDI_CURR_2ND_FIELD_LUMA_OFFSET
,
%
2
*
16
)
//
2
nd
field
luma
from
current
frame
(
line
0
,
2
)
//
mov
(
16
)
r
[
pCF_Y_OFFSET
,
%
1
+
1
*
32
]
<
2
>
:
ub
ubRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
+%
2
,
16
)
//
1
st
field
luma
from
current
frame
(
line
1
,
3
)
//
mov
(
8
)
r
[
pCF_U_OFFSET
,
%
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_2ND_FIELD_CHROMA_OFFSET
,
%
2
*
16
+
1
)
<
16
;8,2> // 2nd field U from current frame (line 0,2)
//
mov
(
8
)
r
[
pCF_V_OFFSET
,
%
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_2ND_FIELD_CHROMA_OFFSET
,
%
2
*
16
)
<
16
;8,2> // 2nd field V from current frame (line 0,2)
//
mov
(
8
)
r
[
pCF_U_OFFSET
,
%
1
+
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
+%
2
,
16
+
1
)
<
16
;8,2> // 1st field U from current frame (line 1,3)
//
mov
(
8
)
r
[
pCF_V_OFFSET
,
%
1
+
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
+%
2
,
16
)
<
16
;8,2> // 1st field U from current frame (line 1,3)
//}
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
16
)
r
[
pCF_Y_OFFSET
,
%
1
*
32
]
<
2
>
:
ub
ubRESP
(
nDI_CURR_2ND_FIELD_LUMA_OFFSET
,
%
2
*
16
)
//
2
nd
field
luma
from
current
frame
(
line
0
,
2
)
mov
(
16
)
r
[
pCF_Y_OFFSET
,
%
1
+
1
*
32
]
<
2
>
:
ub
ubRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
+%
2
,
16
)
//
1
st
field
luma
from
current
frame
(
line
1
,
3
)
}
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
8
)
r
[
pCF_U_OFFSET
,
%
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_2ND_FIELD_CHROMA_OFFSET
,
%
2
*
16
+
1
)
<
16
;8,2> // 2nd field U from current frame (line 0,2)
mov
(
8
)
r
[
pCF_U_OFFSET
,
%
1
+
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
+%
2
,
16
+
1
)
<
16
;8,2> // 1st field U from current frame (line 1,3)
}
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
8
)
r
[
pCF_V_OFFSET
,
%
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_2ND_FIELD_CHROMA_OFFSET
,
%
2
*
16
)
<
16
;8,2> // 2nd field V from current frame (line 0,2)
mov
(
8
)
r
[
pCF_V_OFFSET
,
%
1
+
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
+%
2
,
16
)
<
16
;8,2> // 1st field U from current frame (line 1,3)
}
jmpi
(
1
)
SAVE_DN_CURR
TOP_FIELD_FIRST:
//
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
//
mov
(
16
)
r
[
pCF_Y_OFFSET
,
%
1
*
32
]
<
2
>
:
ub
ubRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
+%
2
,
0
)
//
1
st
field
luma
from
current
frame
(
line
0
,
2
)
//
mov
(
16
)
r
[
pCF_Y_OFFSET
,
%
1
+
1
*
32
]
<
2
>
:
ub
ubRESP
(
nDI_CURR_2ND_FIELD_LUMA_OFFSET
,
%
2
*
16
)
//
2
nd
field
luma
from
current
frame
(
line
1
,
3
)
//
mov
(
8
)
r
[
pCF_U_OFFSET
,
%
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
+%
2
,
1
)
<
16
;8,2> // 1st field U from current frame (line 0,2)
//
mov
(
8
)
r
[
pCF_V_OFFSET
,
%
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
+%
2
,
0
)
<
16
;8,2> // 1st field V from current frame (line 0,2)
//
mov
(
8
)
r
[
pCF_U_OFFSET
,
%
1
+
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_2ND_FIELD_CHROMA_OFFSET
,
%
2
*
16
+
1
)
<
16
;8,2> // 2nd field U from current frame (line 1,3)
//
mov
(
8
)
r
[
pCF_V_OFFSET
,
%
1
+
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_2ND_FIELD_CHROMA_OFFSET
,
%
2
*
16
)
<
16
;8,2> // 2nd field V from current frame (line 1,3)
//}
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
16
)
r
[
pCF_Y_OFFSET
,
%
1
*
32
]
<
2
>
:
ub
ubRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
+%
2
,
0
)
//
1
st
field
luma
from
current
frame
(
line
0
,
2
)
mov
(
16
)
r
[
pCF_Y_OFFSET
,
%
1
+
1
*
32
]
<
2
>
:
ub
ubRESP
(
nDI_CURR_2ND_FIELD_LUMA_OFFSET
,
%
2
*
16
)
//
2
nd
field
luma
from
current
frame
(
line
1
,
3
)
}
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
8
)
r
[
pCF_U_OFFSET
,
%
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
+%
2
,
1
)
<
16
;8,2> // 1st field U from current frame (line 0,2)
mov
(
8
)
r
[
pCF_U_OFFSET
,
%
1
+
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_2ND_FIELD_CHROMA_OFFSET
,
%
2
*
16
+
1
)
<
16
;8,2> // 2nd field U from current frame (line 1,3)
}
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
8
)
r
[
pCF_V_OFFSET
,
%
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
+%
2
,
0
)
<
16
;8,2> // 1st field V from current frame (line 0,2)
mov
(
8
)
r
[
pCF_V_OFFSET
,
%
1
+
1
*
32
]
<
4
>
:
ub
ubRESP
(
nDI_CURR_2ND_FIELD_CHROMA_OFFSET
,
%
2
*
16
)
<
16
;8,2> // 2nd field V from current frame (line 1,3)
}
SAVE_DN_CURR:
$for
(
0
; <nY_NUM_OF_ROWS/2; 1) {
mov
(
8
)
mudMSGHDR_DN
(
%
1
+
1
)
<
1
>
udDN_YUV
(
%
1
)
REGION
(
8
,
1
)
}
send
(
8
)
dNULLREG
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_PA_DN_DI
+
nBI_DESTINATION_YUV
:
ud
#
endif
//
Save
Processed
frames
#
include
"DI_Save_PA.asm"
i965_drv_video/shaders/post_processing/Core_Kernels/PA_DN_ALG.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
#
define
DI
_DISABLE
#
include
"
DNDI.inc
"
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
//
Number
of
Y
rows
per
bl
ock
#
undef
nUV_NUM_OF_ROWS
#
define
nUV_NUM_OF_ROWS
8
//
Number
of
U
/
V
rows
per
bl
ock
#
undef
nSMPL_RESP_LEN
#
define
nSMPL_RESP_LEN
nSMPL_RESP_LEN_DN_PA
//
Set
the
Number
of
GRFs
in
DNDI
response
#
undef
nDPW_BLOCK_SIZE_DN
#
define
nDPW_BLOCK_SIZE_DN
nBLOCK_WIDTH_32
+
nBLOCK_HEIGHT_8
//
DN
Curr
Bl
ock
Si
ze
for
Write
is
32
x8
#
undef
nDPW_BLOCK_SIZE_HIST
#
define
nDPW_BLOCK_SIZE_HIST
nBLOCK_WIDTH_4
+
nBLOCK_HEIGHT_2
//
HIST
Bl
ock
Si
ze
for
Write
is
4
x2
//////////////////////////////////////
Run
the
DN
Al
gorithm
///////////////////////////////////////
#
include
"DNDI_COMMAND.asm"
//////////////////////////////////////
Save
the
History
Data
for
Next
Run
/////////////////////////
#
include
"DNDI_Hist_Save.asm"
//////////////////////////////////////
Pack
and
Save
the
DN
Curr
Frame
for
Next
Run
///////////////
add
(
4
)
pCF_Y_OFFSET
<
1
>
:
uw
ubDEST_CF_OFFSET
<
4
;4,1>:ub npDN_YUV:w
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
16
)
r
[
pCF_Y_OFFSET
,
%
1
*
32
]
<
2
>
:
ub
ubRESP
(
nNODI_LUMA_OFFSET
,
%
1
*
16
)
<
16
;16,1> // copy line of Y
}
$for
(
0
; <nUV_NUM_OF_ROWS; 1) {
mov
(
8
)
r
[
pCF_U_OFFSET
,
%
1
*
32
]
<
4
>
:
ub
ubRESP
(
nNODI_CHROMA_OFFSET
,
%
1
*
16
+
1
)
<
16
;8,2> // copy line of U
mov
(
8
)
r
[
pCF_V_OFFSET
,
%
1
*
32
]
<
4
>
:
ub
ubRESP
(
nNODI_CHROMA_OFFSET
,
%
1
*
16
)
<
16
;8,2> // copy line of V
}
shl
(
1
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
0
;1,0>:w 1:w // X origin * 2 (422 output)
mov
(
1
)
rMSGSRC.1
<
1
>
:
ud
wORIY
<
0
;1,0>:w // Y origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_DN
:
ud
//
bl
ock
width
and
height
(
32
x8
)
mov
(
8
)
mMSGHDR_DN
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud // message header
$for
(
0
; <nY_NUM_OF_ROWS; 2) {
mov
(
16
)
mudMSGHDR_DN
(
1
+%
1
)
<
1
>
udDN_YUV
(
%
1
)
REGION
(
8
,
1
)
//
Move
DN
Curr
to
MRF
}
send
(
8
)
dNULLREG
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_PA_DN_NODI
+
nBI_DESTINATION_YUV
:
ud
i965_drv_video/shaders/post_processing/Core_Kernels/PA_Scaling.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PA_Scaling.asm
----------
#
include
"
Scaling.inc
"
//
Build
16
elements
ramp
in
float32
and
normalized
it
//
mov
(
8
)
SAMPLER_RAMP
(
0
)
<
1
>
0x76543210
:
v
//
add
(
8
)
SAMPLER_RAMP
(
1
)
<
1
>
SAMPLER_RAMP
(
0
)
8.0
:
f
mov
(
4
)
SAMPLER_RAMP
(
0
)
<
1
>
0x48403000
:
vf
//
3
,
2
,
1
,
0
in
float
vector
mov
(
4
)
SAMPLER_RAMP
(
0
,
4
)
<
1
>
0x5C585450
:
vf
//
7
,
6
,
5
,
4
in
float
vector
add
(
8
)
SAMPLER_RAMP
(
1
)
<
1
>
SAMPLER_RAMP
(
0
)
8.0
:
f
//
Module:
PrepareScaleCoord.asm
//
Setup
for
sampler
msg
hdr
mov
(
2
)
rMSGSRC.0
<
1
>
:
ud
0
:
ud
{
NoDDClr
}
//
Unused
fields
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
0
:
ud
{
NoDDChk
}
//
Write
and
offset
//
Calculate
16
v
based
on
the
step
Y
and
vertical
origin
mov
(
16
)
mfMSGPAYLOAD
(
2
)
<
1
>
fS
RC_VID_V_ORI
<
0
;1,0>:f
mov
(
16
)
SCALE_COORD_Y
<
1
>
:
f
fS
RC_VID_V_ORI
<
0
;1,0>:f
//
Calculate
16
u
based
on
the
step
X
and
hori
origin
//
line
(
16
)
mfMSGPAYLOAD
(
0
)
<
1
>
SCALE_STEP_X
<
0
;1,0>:f SAMPLER_RAMP(0) // Assign to mrf directly
mov
(
16
)
acc0
:
f
fS
RC_VID_H_ORI
<
0
;1,0>:f { Compr }
mac
(
16
)
mfMSGPAYLOAD
(
0
)
<
1
>
fVIDEO_STEP_X
<
0
;1,0>:f SAMPLER_RAMP(0) { Compr }
//
Setup
the
constants
for
line
instruction
mov
(
1
)
SCALE_LINE_P255
<
1
>
:
f
255.0
:
f
{
NoDDClr
}
//
{
NoDDClr
,
NoDDChk
}
mov
(
1
)
SCALE_LINE_P0_5
<
1
>
:
f
0.5
:
f
{
NoDDChk
}
//------------------------------------------------------------------------------
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
//
Read
16
sampled
pixels
and
store
them
in
float32
in
8
GRFs
in
the
order
of
BGRA
(
VYUA
)
.
mov
(
8
)
MSGHDR_SCALE.0
:
ud
rMSGSRC.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
16
)
SCALE_RESPONSE_YW
(
0
)
<
1
>
MSGHDR_SCALE
udDUMMY_NULL
nSMPL_ENGINE
SMPLR_MSG_DSC
+
nSI_SRC_SIMD16_YUV
+
nBI_CURRENT_SRC_YUV
//
Calculate
16
v
for
next
line
add
(
16
)
mfMSGPAYLOAD
(
2
)
<
1
>
SCALE_COORD_Y
<
8
;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly
add
(
16
)
SCALE_COORD_Y
<
1
>
:
f
SCALE_COORD_Y
<
8
;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly
//
Scale
back
to
[
0
,
255
],
convert
f
to
ud
line
(
16
)
acc0
:
f
SCALE_LINE_P255
<
0
;1,0>:f SCALE_RESPONSE_YF(0) { Compr } // Process B, V
mov
(
16
)
SCALE_RESPONSE_YD
(
0
)
<
1
>
acc0
:
f
{
Compr
}
line
(
16
)
acc0
:
f
SCALE_LINE_P255
<
0
;1,0>:f SCALE_RESPONSE_YF(2) { Compr } // Process B, V
mov
(
16
)
SCALE_RESPONSE_YD
(
2
)
<
1
>
acc0
:
f
{
Compr
}
line
(
16
)
acc0
:
f
SCALE_LINE_P255
<
0
;1,0>:f SCALE_RESPONSE_YF(4) { Compr } // Process B, V
mov
(
16
)
SCALE_RESPONSE_YD
(
4
)
<
1
>
acc0
:
f
{
Compr
}
mov
(
16
)
DEST_V
(
%
1
)
<
1
>
SCALE_RESPONSE_YB
(
0
)
//
possible
error
due
to
truncation
-
vK
mov
(
16
)
DEST_Y
(
%
1
)
<
1
>
SCALE_RESPONSE_YB
(
2
)
//
possible
error
due
to
truncation
-
vK
mov
(
16
)
DEST_U
(
%
1
)
<
1
>
SCALE_RESPONSE_YB
(
4
)
//
possible
error
due
to
truncation
-
vK
}
#
define
nSRC_REGION
nREGION_1
//------------------------------------------------------------------------------
i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_16x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PL2_AVS_IEF_16x8.asm
----------
#
include
"
AVS_IEF.inc
"
//------------------------------------------------------------------------------
//
2
sampler
reads
for
8
x8
Y
each
//
2
sampler
read
for
8
x8
U
and
8
x8
V
(
NV11
\
P208
input
surface
)
//------------------------------------------------------------------------------
//
1
st
8
x8
setup
#
include
"AVS_SetupFirstBlock.asm"
//
Enable
green
ch
annel
only
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_GREEN_CHANNEL_ONLY
:
ud
mov
(
16
)
mAVS_8x8_HDR.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
0
)
<
1
>
mAVS_8x8_HDR
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_Y
+
nBI_CURRENT_SRC_Y
//
Return
Y
in
4
GRFs
//
8
x8
U
and
V
sampling
//
Enable
red
and
bl
ue
ch
annels
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_RED_BLUE_CHANNELS
:
ud
mov
(
16
)
mAVS_8x8_HDR_UV.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
4
)
<
1
>
mAVS_8x8_HDR_UV
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_2CH
+
nSI_SRC_UV
+
nBI_CURRENT_SRC_UV
//
Return
U
and
V
in
8
GRFs
//
2
nd
8
x8
setup
#
include
"
AVS_SetupSecondBlock.asm
"
//
2
nd
8
x8
Y
sampling
//
Enable
green
ch
annel
only
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_GREEN_CHANNEL_ONLY
:
ud
mov
(
16
)
mAVS_8x8_HDR.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE_2
(
0
)
<
1
>
mAVS_8x8_HDR
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_Y
+
nBI_CURRENT_SRC_Y
//
2
nd
8
x8
U
and
V
sampling
//
Enable
red
and
bl
ue
ch
annels
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_RED_BLUE_CHANNELS
:
ud
mov
(
16
)
mAVS_8x8_HDR_UV.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE_2
(
4
)
<
1
>
mAVS_8x8_HDR_UV
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_2CH
+
nSI_SRC_UV
+
nBI_CURRENT_SRC_UV
//
Return
U
and
V
in
8
GRFs
//------------------------------------------------------------------------------
//
Unpacking
sampler
reads
to
4
:
4
:
4
internal
planar
//------------------------------------------------------------------------------
#
include
"
PL2_AVS_IEF_Unpack_16x8.asm
"
i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x4.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PL2_AVS_IEF_8x4.asm
----------
#
include
"
AVS_IEF.inc
"
//------------------------------------------------------------------------------
//
2
sampler
reads
for
8
x8
Y
each
//
1
sampler
read
for
8
x8
U
and
8
x8
V
(
NV11
\
NV12
input
surface
)
//------------------------------------------------------------------------------
//
1
st
8
x8
setup
#
include
"AVS_SetupFirstBlock.asm"
//
Enable
green
ch
annel
only
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_GREEN_CHANNEL_ONLY
:
ud
mov
(
16
)
mAVS_8x8_HDR.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
0
)
<
1
>
mAVS_8x8_HDR
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_Y
+
nBI_CURRENT_SRC_Y
//
Return
Y
in
4
GRFs
//
8
x8
U
and
V
sampling
//
Enable
red
and
bl
ue
ch
annels
//
Only
8
x4
wil
be
used
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_RED_BLUE_CHANNELS
:
ud
//
Calculate
Ch
roma
Step
Si
ze
:
//
for
H
di
rection
:
16
Luma
samples
are
covered
by
8
Ch
roma
samples.
Thus
Ch
roma_Step_X
=
2
*
Luma_Step_X
//
for
V
di
rection
:
8
Luma
samples
are
covered
by
8
Ch
roma
samples.
Thus
Ch
roma_Step_Y
=
Luma_Step_Y
mul
(
1
)
rAVS_PAYLOAD.1
:
f
fVIDEO_STEP_X
:
f
2.0
:
f
//
Step
X
for
ch
roma
mov
(
16
)
mAVS_8x8_HDR_UV.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
4
)
<
1
>
mAVS_8x8_HDR_UV
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_2CH
+
nSI_SRC_UV
+
nBI_CURRENT_SRC_UV
//
Return
U
and
V
in
8
GRFs
//
2
nd
8
x8
setup
#
include
"
AVS_SetupSecondBlock.asm
"
//
2
nd
8
x8
Y
sampling
//
Enable
green
ch
annel
only
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_GREEN_CHANNEL_ONLY
:
ud
mov
(
16
)
mAVS_8x8_HDR.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE_2
(
0
)
<
1
>
mAVS_8x8_HDR
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_Y
+
nBI_CURRENT_SRC_Y
//------------------------------------------------------------------------------
//
Unpacking
sampler
reads
to
4
:
2
:
0
internal
planar
//------------------------------------------------------------------------------
#
include
"
PL2_AVS_IEF_Unpack_8x4.asm
"
i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PL2_AVS_IEF_8x8.asm
----------
#
include
"
AVS_IEF.inc
"
//------------------------------------------------------------------------------
//
2
sampler
reads
for
8
x8
Y
each
//
1
sampler
read
for
8
x8
U
and
8
x8
V
(
NV11
\
NV12
input
surface
)
//------------------------------------------------------------------------------
//
1
st
8
x8
setup
#
include
"AVS_SetupFirstBlock.asm"
//
Enable
green
ch
annel
only
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_GREEN_CHANNEL_ONLY
:
ud
mov
(
16
)
mAVS_8x8_HDR.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
0
)
<
1
>
mAVS_8x8_HDR
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_Y
+
nBI_CURRENT_SRC_Y
//
Return
Y
in
4
GRFs
//
8
x8
U
and
V
sampling
//
Enable
red
and
bl
ue
ch
annels
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_RED_BLUE_CHANNELS
:
ud
//
Calculate
Ch
roma
Step
Si
ze
:
//
for
H
di
rection
:
16
Luma
samples
are
covered
by
8
Ch
roma
samples.
Thus
Ch
roma_Step_X
=
2
*
Luma_Step_X
//
for
V
di
rection
:
8
Luma
samples
are
covered
by
8
Ch
roma
samples.
Thus
Ch
roma_Step_Y
=
Luma_Step_Y
mul
(
1
)
rAVS_PAYLOAD.1
:
f
fVIDEO_STEP_X
:
f
2.0
:
f
//
Step
X
for
ch
roma
mov
(
16
)
mAVS_8x8_HDR_UV.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
4
)
<
1
>
mAVS_8x8_HDR_UV
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_2CH
+
nSI_SRC_UV
+
nBI_CURRENT_SRC_UV
//
Return
U
and
V
in
8
GRFs
//
2
nd
8
x8
setup
#
include
"
AVS_SetupSecondBlock.asm
"
//
2
nd
8
x8
Y
sampling
//
Enable
green
ch
annel
only
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_GREEN_CHANNEL_ONLY
:
ud
mov
(
16
)
mAVS_8x8_HDR.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE_2
(
0
)
<
1
>
mAVS_8x8_HDR
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_Y
+
nBI_CURRENT_SRC_Y
//------------------------------------------------------------------------------
//
Unpacking
sampler
reads
to
4
:
2
:
2
internal
planar
//------------------------------------------------------------------------------
#
include
"
PL2_AVS_IEF_Unpack_8x8.asm
"
i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PL2_AVS_IEF_Unpack_16x8.asm
----------
#
ifdef
AVS_OUTPUT_16_BIT
//
Output
is
packed
in
AVYU
format
//
Move
first
8
x8
word
s
of
Y
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
1
)
<
4
>
uwAVS_RESPONSE
(
0
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
1
,
1
)
<
4
>
uwAVS_RESPONSE
(
0
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
4
,
1
)
<
4
>
uwAVS_RESPONSE
(
0
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
5
,
1
)
<
4
>
uwAVS_RESPONSE
(
0
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
8
,
1
)
<
4
>
uwAVS_RESPONSE
(
1
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
9
,
1
)
<
4
>
uwAVS_RESPONSE
(
1
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
12
,
1
)
<
4
>
uwAVS_RESPONSE
(
1
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
13
,
1
)
<
4
>
uwAVS_RESPONSE
(
1
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
16
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
17
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
20
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
21
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
24
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
25
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
28
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
29
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
12
)
<
4
;4,1>
//
Move
first
8
x8
word
s
of
U
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
0
)
<
4
>
uwAVS_RESPONSE
(
4
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
1
,
0
)
<
4
>
uwAVS_RESPONSE
(
4
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
4
,
0
)
<
4
>
uwAVS_RESPONSE
(
4
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
5
,
0
)
<
4
>
uwAVS_RESPONSE
(
4
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
8
,
0
)
<
4
>
uwAVS_RESPONSE
(
5
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
9
,
0
)
<
4
>
uwAVS_RESPONSE
(
5
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
12
,
0
)
<
4
>
uwAVS_RESPONSE
(
5
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
13
,
0
)
<
4
>
uwAVS_RESPONSE
(
5
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
16
,
0
)
<
4
>
uwAVS_RESPONSE
(
8
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
17
,
0
)
<
4
>
uwAVS_RESPONSE
(
8
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
20
,
0
)
<
4
>
uwAVS_RESPONSE
(
8
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
21
,
0
)
<
4
>
uwAVS_RESPONSE
(
8
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
24
,
0
)
<
4
>
uwAVS_RESPONSE
(
9
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
25
,
0
)
<
4
>
uwAVS_RESPONSE
(
9
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
28
,
0
)
<
4
>
uwAVS_RESPONSE
(
9
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
29
,
0
)
<
4
>
uwAVS_RESPONSE
(
9
,
12
)
<
4
;4,1>
//
Move
first
8
x8
word
s
of
V
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
2
)
<
4
>
uwAVS_RESPONSE
(
6
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
1
,
2
)
<
4
>
uwAVS_RESPONSE
(
6
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
4
,
2
)
<
4
>
uwAVS_RESPONSE
(
6
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
5
,
2
)
<
4
>
uwAVS_RESPONSE
(
6
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
8
,
2
)
<
4
>
uwAVS_RESPONSE
(
7
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
9
,
2
)
<
4
>
uwAVS_RESPONSE
(
7
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
12
,
2
)
<
4
>
uwAVS_RESPONSE
(
7
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
13
,
2
)
<
4
>
uwAVS_RESPONSE
(
7
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
16
,
2
)
<
4
>
uwAVS_RESPONSE
(
10
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
17
,
2
)
<
4
>
uwAVS_RESPONSE
(
10
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
20
,
2
)
<
4
>
uwAVS_RESPONSE
(
10
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
21
,
2
)
<
4
>
uwAVS_RESPONSE
(
10
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
24
,
2
)
<
4
>
uwAVS_RESPONSE
(
11
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
25
,
2
)
<
4
>
uwAVS_RESPONSE
(
11
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
28
,
2
)
<
4
>
uwAVS_RESPONSE
(
11
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
29
,
2
)
<
4
>
uwAVS_RESPONSE
(
11
,
12
)
<
4
;4,1>
//
Move
first
8
x8
word
s
of
A
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
1
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
4
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
5
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
8
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
9
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
12
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
13
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
16
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
17
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
20
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
21
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
24
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
25
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
28
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
29
,
3
)
<
4
>
0
:
uw
//
Move
second
8
x8
word
s
of
Y
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
3
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
6
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
7
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
10
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
11
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
14
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
15
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
18
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
19
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
22
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
23
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
26
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
27
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
30
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
31
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
12
)
<
4
;4,1>
//
Move
second
8
x8
word
s
of
U
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
3
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
6
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
7
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
10
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
11
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
14
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
15
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
18
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
19
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
22
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
23
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
26
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
27
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
30
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
31
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
12
)
<
4
;4,1>
//
Move
second
8
x8
word
s
of
V
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
3
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
6
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
7
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
10
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
11
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
14
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
15
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
18
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
19
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
22
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
23
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
26
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
27
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
30
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
31
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
12
)
<
4
;4,1>
//
Move
second
8
x8
word
s
of
A
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
3
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
6
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
7
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
10
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
11
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
14
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
15
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
18
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
19
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
22
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
23
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
26
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
27
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
30
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
31
,
3
)
<
4
>
0
:
uw
/*
This
section
will
be
used
if
16
-
bit
output
is
needed
in
planar
format
-
vK
//
Move
1
st
8
x8
word
s
of
Y
to
dest
GRF
at
lower
8
word
s
of
each
RGF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
)
<
1
>
uwAVS_RESPONSE
(
%
1
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
)
<
1
>
uwAVS_RESPONSE
(
%
1
,
8
)
<
8
;4,1>
}
//
Move
1
st
8
x8
word
s
of
U
to
dest
GRF
(
Copy
high
byte
in
a
word
)
mov
(
8
)
uwDEST_U
(
0
)
<
1
>
uwAVS_RESPONSE
(
4
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
1
)
<
1
>
uwAVS_RESPONSE
(
4
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
2
)
<
1
>
uwAVS_RESPONSE
(
5
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
3
)
<
1
>
uwAVS_RESPONSE
(
5
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
4
)
<
1
>
uwAVS_RESPONSE
(
8
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
5
)
<
1
>
uwAVS_RESPONSE
(
8
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
6
)
<
1
>
uwAVS_RESPONSE
(
9
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
7
)
<
1
>
uwAVS_RESPONSE
(
9
,
8
)
<
8
;4,1>
//
Move
1
st
8
x8
word
s
of
V
to
dest
GRF
mov
(
8
)
uwDEST_V
(
0
)
<
1
>
uwAVS_RESPONSE
(
6
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
1
)
<
1
>
uwAVS_RESPONSE
(
6
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
2
)
<
1
>
uwAVS_RESPONSE
(
7
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
3
)
<
1
>
uwAVS_RESPONSE
(
7
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
4
)
<
1
>
uwAVS_RESPONSE
(
10
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
5
)
<
1
>
uwAVS_RESPONSE
(
10
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
6
)
<
1
>
uwAVS_RESPONSE
(
11
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
7
)
<
1
>
uwAVS_RESPONSE
(
11
,
8
)
<
8
;4,1>
//
Move
2
nd
8
x8
word
s
of
Y
to
dest
GRF
at
higher
8
word
s
of
each
GRF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
%
1
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
%
1
,
8
)
<
8
;4,1>
}
//
Move
2
st
8
x8
word
s
of
U
to
dest
GRF
(
Copy
high
byte
in
a
word
)
mov
(
8
)
uwDEST_U
(
0
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
4
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
1
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
4
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
2
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
5
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
3
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
5
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
4
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
8
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
5
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
8
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
6
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
9
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
7
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
9
,
8
)
<
8
;4,1>
//
Move
2
st
8
x8
word
s
of
V
to
dest
GRF
mov
(
8
)
uwDEST_V
(
0
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
6
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
1
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
6
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
2
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
7
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
3
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
7
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
4
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
10
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
5
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
10
,
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
6
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
11
,
0
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
7
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
11
,
8
)
<
8
;4,1>
*/
#
else
//
Move
1
st
8
x8
word
s
of
Y
to
dest
GRF
at
lower
8
word
s
of
each
RGF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
)
<
1
>
ubAVS_RESPONSE
(
%
1
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
)
<
1
>
ubAVS_RESPONSE
(
%
1
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//
Move
1
st
8
x8
word
s
of
U
to
dest
GRF
(
Copy
high
byte
in
a
word
)
mov
(
8
)
uwDEST_U
(
0
)
<
1
>
ubAVS_RESPONSE
(
4
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
1
)
<
1
>
ubAVS_RESPONSE
(
4
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
2
)
<
1
>
ubAVS_RESPONSE
(
5
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
3
)
<
1
>
ubAVS_RESPONSE
(
5
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
4
)
<
1
>
ubAVS_RESPONSE
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
5
)
<
1
>
ubAVS_RESPONSE
(
8
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
6
)
<
1
>
ubAVS_RESPONSE
(
9
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
7
)
<
1
>
ubAVS_RESPONSE
(
9
,
8
+
1
)
<
16
;4,2>
//
Move
1
st
8
x8
word
s
of
V
to
dest
GRF
mov
(
8
)
uwDEST_V
(
0
)
<
1
>
ubAVS_RESPONSE
(
6
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
1
)
<
1
>
ubAVS_RESPONSE
(
6
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
2
)
<
1
>
ubAVS_RESPONSE
(
7
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
3
)
<
1
>
ubAVS_RESPONSE
(
7
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
4
)
<
1
>
ubAVS_RESPONSE
(
10
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
5
)
<
1
>
ubAVS_RESPONSE
(
10
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
6
)
<
1
>
ubAVS_RESPONSE
(
11
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
7
)
<
1
>
ubAVS_RESPONSE
(
11
,
8
+
1
)
<
16
;4,2>
//
Move
2
nd
8
x8
word
s
of
Y
to
dest
GRF
at
higher
8
word
s
of
each
GRF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
%
1
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
%
1
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//
Move
2
st
8
x8
word
s
of
U
to
dest
GRF
(
Copy
high
byte
in
a
word
)
mov
(
8
)
uwDEST_U
(
0
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
4
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
4
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
5
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
3
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
5
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
4
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
5
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
8
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
6
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
9
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
7
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
9
,
8
+
1
)
<
16
;4,2>
//
Move
2
st
8
x8
word
s
of
V
to
dest
GRF
mov
(
8
)
uwDEST_V
(
0
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
6
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
6
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
7
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
3
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
7
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
4
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
10
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
5
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
10
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
6
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
11
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
7
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
11
,
8
+
1
)
<
16
;4,2>
#
endif
//
Re
-
define
new
#
of
lines
#
undef
nUV_NUM_OF_ROWS
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
#
define
nUV_NUM_OF_ROWS
8
i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PL2_AVS_IEF_8x4.asm
----------
//
Move
first
8
x8
word
s
of
Y
to
dest
GRF
at
lower
8
word
s
of
each
RGF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
)
<
1
>
ubAVS_RESPONSE
(
%
1
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
)
<
1
>
ubAVS_RESPONSE
(
%
1
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//
Move
8
x4
word
s
of
U
to
dest
GRF
(
Copy
high
byte
in
a
word
)
mov
(
8
)
uwDEST_U
(
0
)
<
1
>
ubAVS_RESPONSE
(
4
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
0
,
8
)
<
1
>
ubAVS_RESPONSE
(
5
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
1
)
<
1
>
ubAVS_RESPONSE
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
1
,
8
)
<
1
>
ubAVS_RESPONSE
(
9
,
1
)
<
16
;4,2>
//
Move
8
x4
word
s
of
V
to
dest
GRF
mov
(
8
)
uwDEST_V
(
0
)
<
1
>
ubAVS_RESPONSE
(
6
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
0
,
8
)
<
1
>
ubAVS_RESPONSE
(
7
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
1
)
<
1
>
ubAVS_RESPONSE
(
10
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
1
,
8
)
<
1
>
ubAVS_RESPONSE
(
11
,
1
)
<
16
;4,2>
//
Move
2
nd
8
x8
word
s
of
Y
to
dest
GRF
at
higher
8
word
s
of
each
GRF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
%
1
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
%
1
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//------------------------------------------------------------------------------
//
Re
-
define
new
#
of
lines
#
undef
nUV_NUM_OF_ROWS
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
#
define
nUV_NUM_OF_ROWS
4
i965_drv_video/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PL2_AVS_IEF_8x8.asm
----------
//
Move
first
8
x8
word
s
of
Y
to
dest
GRF
at
lower
8
word
s
of
each
RGF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
)
<
1
>
ubAVS_RESPONSE
(
%
1
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
)
<
1
>
ubAVS_RESPONSE
(
%
1
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//
Move
8
x8
word
s
of
U
to
dest
GRF
(
Copy
high
byte
in
a
word
)
mov
(
8
)
uwDEST_U
(
0
)
<
1
>
ubAVS_RESPONSE
(
4
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
0
,
8
)
<
1
>
ubAVS_RESPONSE
(
4
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
1
)
<
1
>
ubAVS_RESPONSE
(
5
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
1
,
8
)
<
1
>
ubAVS_RESPONSE
(
5
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
2
)
<
1
>
ubAVS_RESPONSE
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
2
,
8
)
<
1
>
ubAVS_RESPONSE
(
8
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
3
)
<
1
>
ubAVS_RESPONSE
(
9
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
3
,
8
)
<
1
>
ubAVS_RESPONSE
(
9
,
8
+
1
)
<
16
;4,2>
//
Move
8
x8
word
s
of
V
to
dest
GRF
mov
(
8
)
uwDEST_V
(
0
)
<
1
>
ubAVS_RESPONSE
(
6
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
0
,
8
)
<
1
>
ubAVS_RESPONSE
(
6
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
1
)
<
1
>
ubAVS_RESPONSE
(
7
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
1
,
8
)
<
1
>
ubAVS_RESPONSE
(
7
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
2
)
<
1
>
ubAVS_RESPONSE
(
10
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
2
,
8
)
<
1
>
ubAVS_RESPONSE
(
10
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
3
)
<
1
>
ubAVS_RESPONSE
(
11
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
3
,
8
)
<
1
>
ubAVS_RESPONSE
(
11
,
8
+
1
)
<
16
;4,2>
//
Move
2
nd
8
x8
word
s
of
Y
to
dest
GRF
at
higher
8
word
s
of
each
GRF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
%
1
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
%
1
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//------------------------------------------------------------------------------
//
Re
-
define
new
#
of
lines
#
undef
nUV_NUM_OF_ROWS
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
#
define
nUV_NUM_OF_ROWS
8
i965_drv_video/shaders/post_processing/Core_Kernels/PL2_Scaling.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PL2_Scaling.asm
----------
#
include
"
Scaling.inc
"
//
Build
16
elements
ramp
in
float32
and
normalized
it
//
mov
(
8
)
SAMPLER_RAMP
(
0
)
<
1
>
0x76543210
:
v
//
add
(
8
)
SAMPLER_RAMP
(
1
)
<
1
>
SAMPLER_RAMP
(
0
)
8.0
:
f
mov
(
4
)
SAMPLER_RAMP
(
0
)
<
1
>
0x48403000
:
vf
//
3
,
2
,
1
,
0
in
float
vector
mov
(
4
)
SAMPLER_RAMP
(
0
,
4
)
<
1
>
0x5C585450
:
vf
//
7
,
6
,
5
,
4
in
float
vector
add
(
8
)
SAMPLER_RAMP
(
1
)
<
1
>
SAMPLER_RAMP
(
0
)
8.0
:
f
//
Module:
PrepareScaleCoord.asm
//
Setup
for
sampler
msg
hdr
mov
(
2
)
rMSGSRC.0
<
1
>
:
ud
0
:
ud
{
NoDDClr
}
//
Unused
fields
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
0
:
ud
{
NoDDChk
}
//
Write
and
offset
//
Calculate
16
v
based
on
the
step
Y
and
vertical
origin
mov
(
16
)
mfMSGPAYLOAD
(
2
)
<
1
>
fS
RC_VID_V_ORI
<
0
;1,0>:f
mov
(
16
)
SCALE_COORD_Y
<
1
>
:
f
fS
RC_VID_V_ORI
<
0
;1,0>:f
//
Calculate
16
u
based
on
the
step
X
and
hori
origin
//
line
(
16
)
mfMSGPAYLOAD
(
0
)
<
1
>
SCALE_STEP_X
<
0
;1,0>:f SAMPLER_RAMP(0) // Assign to mrf directly
mov
(
16
)
acc0
:
f
fS
RC_VID_H_ORI
<
0
;1,0>:f { Compr }
mac
(
16
)
mfMSGPAYLOAD
(
0
)
<
1
>
fVIDEO_STEP_X
<
0
;1,0>:f SAMPLER_RAMP(0) { Compr }
//
Setup
the
constants
for
line
instruction
mov
(
1
)
SCALE_LINE_P255
<
1
>
:
f
255.0
:
f
{
NoDDClr
}
//
{
NoDDClr
,
NoDDChk
}
mov
(
1
)
SCALE_LINE_P0_5
<
1
>
:
f
0.5
:
f
{
NoDDChk
}
//------------------------------------------------------------------------------
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
//
Read
16
sampled
pixels
and
store
them
in
float32
in
8
GRFs
in
the
order
of
BGRA
(
VYUA
)
.
mov
(
8
)
MSGHDR_SCALE.0
:
ud
rMSGSRC.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
16
)
SCALE_RESPONSE_YW
(
0
)
<
1
>
MSGHDR_SCALE
udDUMMY_NULL
nSMPL_ENGINE
SMPLR_MSG_DSC
+
nSI_SRC_SIMD16_Y
+
nBI_CURRENT_SRC_Y
send
(
16
)
SCALE_RESPONSE_UW
(
0
)
<
1
>
MSGHDR_SCALE
udDUMMY_NULL
nSMPL_ENGINE
SMPLR_MSG_DSC
+
nSI_SRC_SIMD16_UV
+
nBI_CURRENT_SRC_UV
//
Calculate
16
v
for
next
line
add
(
16
)
mfMSGPAYLOAD
(
2
)
<
1
>
SCALE_COORD_Y
<
8
;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly
add
(
16
)
SCALE_COORD_Y
<
1
>
:
f
SCALE_COORD_Y
<
8
;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly
//
Scale
back
to
[
0
,
255
],
convert
f
to
ud
line
(
16
)
acc0
:
f
SCALE_LINE_P255
<
0
;1,0>:f SCALE_RESPONSE_YF(0) { Compr } // Process B, V
mov
(
16
)
SCALE_RESPONSE_YD
(
0
)
<
1
>
acc0
:
f
{
Compr
}
line
(
16
)
acc0
:
f
SCALE_LINE_P255
<
0
;1,0>:f SCALE_RESPONSE_UF(0) { Compr } // Process B, V
mov
(
16
)
SCALE_RESPONSE_UD
(
0
)
<
1
>
acc0
:
f
{
Compr
}
line
(
16
)
acc0
:
f
SCALE_LINE_P255
<
0
;1,0>:f SCALE_RESPONSE_UF(2) { Compr } // Process B, V
mov
(
16
)
SCALE_RESPONSE_UD
(
2
)
<
1
>
acc0
:
f
{
Compr
}
mov
(
16
)
DEST_Y
(
%
1
)
<
1
>
SCALE_RESPONSE_YB
(
0
)
//
possible
error
due
to
truncation
-
vK
mov
(
16
)
DEST_U
(
%
1
)
<
1
>
SCALE_RESPONSE_UB
(
0
)
//
possible
error
due
to
truncation
-
vK
mov
(
16
)
DEST_V
(
%
1
)
<
1
>
SCALE_RESPONSE_UB
(
2
)
//
possible
error
due
to
truncation
-
vK
}
#
define
nSRC_REGION
nREGION_1
//------------------------------------------------------------------------------
i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_16x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PL3_AVS_IEF_16x8.asm
----------
#
include
"
AVS_IEF.inc
"
//------------------------------------------------------------------------------
//
2
sampler
reads
for
8
x8
Y
surface
//
2
sampler
read
for
8
x8
U
surface
//
2
sampler
read
for
8
x8
V
surface
//------------------------------------------------------------------------------
//
1
st
8
x8
setup
#
include
"AVS_SetupFirstBlock.asm"
//
1
st
8
x8
Y
sampling
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_GREEN_CHANNEL_ONLY
:
ud
//
Enable
green
ch
annel
mov
(
16
)
mAVS_8x8_HDR.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
0
)
<
1
>
mAVS_8x8_HDR
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_Y
+
nBI_CURRENT_SRC_Y
//
Return
Y
in
4
GRFs
//
1
st
8
x8
U
sampling
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_RED_CHANNEL_ONLY
:
ud
//
Enable
red
ch
annel
mov
(
16
)
mAVS_8x8_HDR_UV.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
4
)
<
1
>
mAVS_8x8_HDR_UV
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_U
+
nBI_CURRENT_SRC_U
//
Return
U
in
4
GRFs
//
1
st
8
x8
V
sampling
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_RED_CHANNEL_ONLY
:
ud
//
Dummy
instruction
to
avoid
back
-
2
-
back
send
instructions
mov
(
16
)
mAVS_8x8_HDR_UV.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
8
)
<
1
>
mAVS_8x8_HDR_UV
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_V
+
nBI_CURRENT_SRC_V
//
Return
V
in
4
GRFs
//
2
nd
8
x8
setup
#
include
"
AVS_SetupSecondBlock.asm
"
//
2
nd
8
x8
Y
sampling
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_GREEN_CHANNEL_ONLY
:
ud
//
Enable
green
ch
annel
mov
(
16
)
mAVS_8x8_HDR.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE_2
(
0
)
<
1
>
mAVS_8x8_HDR
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_Y
+
nBI_CURRENT_SRC_Y
//
Return
Y
in
4
GRFs
//
2
nd
8
x8
U
sampling
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_RED_CHANNEL_ONLY
:
ud
//
Enable
red
ch
annel
mov
(
16
)
mAVS_8x8_HDR_UV.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE_2
(
4
)
<
1
>
mAVS_8x8_HDR_UV
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_U
+
nBI_CURRENT_SRC_U
//
Return
U
in
4
GRFs
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_RED_CHANNEL_ONLY
:
ud
//
Dummy
instruction
just
in
order
to
avoid
back
-
2
-
back
send
instructions
!
//
2
nd
8
x8
V
sampling
mov
(
16
)
mAVS_8x8_HDR_UV.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE_2
(
8
)
<
1
>
mAVS_8x8_HDR_UV
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_V
+
nBI_CURRENT_SRC_V
//
Return
V
in
4
GRFs
//------------------------------------------------------------------------------
//
Unpacking
sampler
reads
to
4
:
4
:
4
internal
planar
//------------------------------------------------------------------------------
#
include
"
PL3_AVS_IEF_Unpack_16x8.asm
"
i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x4.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PL3_AVS_IEF_8x4.asm
----------
#
include
"
AVS_IEF.inc
"
//------------------------------------------------------------------------------
//
2
sampler
reads
for
8
x8
Y
surface
//
1
sampler
read
for
8
x8
U
surface
//
1
sampler
read
for
8
x8
V
surface
//------------------------------------------------------------------------------
//
1
st
8
x8
setup
#
include
"AVS_SetupFirstBlock.asm"
//
1
st
8
x8
Y
sampling
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_GREEN_CHANNEL_ONLY
:
ud
//
Enable
green
ch
annel
mov
(
16
)
mAVS_8x8_HDR.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
0
)
<
1
>
mAVS_8x8_HDR
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_Y
+
nBI_CURRENT_SRC_Y
//
Return
Y
in
4
GRFs
//
8
x8
U
sampling
; Only 8x4 will be used
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_RED_CHANNEL_ONLY
:
ud
//
Enable
red
ch
annel
mul
(
1
)
rAVS_PAYLOAD.1
:
f
fVIDEO_STEP_X
:
f
2.0
:
f
//
Calculate
Step
X
for
ch
roma
mov
(
16
)
mAVS_8x8_HDR_UV.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
4
)
<
1
>
mAVS_8x8_HDR_UV
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_U
+
nBI_CURRENT_SRC_U
//
Return
U
in
4
GRFs
//
8
x8
V
sampling
; Only 8x4 will be used
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_RED_CHANNEL_ONLY
:
ud
//
Dummy
instruction
just
in
order
to
avoid
back
-
2
-
back
send
instructions
!
mov
(
16
)
mAVS_8x8_HDR_UV.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
8
)
<
1
>
mAVS_8x8_HDR_UV
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_V
+
nBI_CURRENT_SRC_V
//
Return
V
in
4
GRFs
//
2
nd
8
x8
setup
#
include
"
AVS_SetupSecondBlock.asm
"
//
2
nd
8
x8
Y
sampling
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_GREEN_CHANNEL_ONLY
:
ud
//
Enable
green
ch
annel
mov
(
1
)
rAVS_PAYLOAD.1
:
f
fVIDEO_STEP_X
:
f
//
Restore
Step
X
for
luma
mov
(
16
)
mAVS_8x8_HDR.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
12
)
<
1
>
mAVS_8x8_HDR
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_Y
+
nBI_CURRENT_SRC_Y
//
Return
Y
in
4
GRFs
//------------------------------------------------------------------------------
//
Unpacking
sampler
reads
to
4
:
2
:
0
internal
planar
//------------------------------------------------------------------------------
#
include
"
PL3_AVS_IEF_Unpack_8x4.asm
"
i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PL3_AVS_IEF_8x8.asm
----------
#
include
"
AVS_IEF.inc
"
//------------------------------------------------------------------------------
//
2
sampler
reads
for
8
x8
Y
surface
//
1
sampler
read
for
8
x8
U
surface
//
1
sampler
read
for
8
x8
V
surface
//------------------------------------------------------------------------------
//
1
st
8
x8
setup
#
include
"AVS_SetupFirstBlock.asm"
//
1
st
8
x8
Y
sampling
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_GREEN_CHANNEL_ONLY
:
ud
//
Enable
green
ch
annel
mov
(
16
)
mAVS_8x8_HDR.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
0
)
<
1
>
mAVS_8x8_HDR
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_Y
+
nBI_CURRENT_SRC_Y
//
Return
Y
in
4
GRFs
//
8
x8
U
sampling
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_RED_CHANNEL_ONLY
:
ud
//
Enable
red
ch
annel
mul
(
1
)
rAVS_PAYLOAD.1
:
f
fVIDEO_STEP_X
:
f
2.0
:
f
//
Calculate
Step
X
for
ch
roma
mov
(
16
)
mAVS_8x8_HDR_UV.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
4
)
<
1
>
mAVS_8x8_HDR_UV
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_U
+
nBI_CURRENT_SRC_U
//
Return
U
in
4
GRFs
//
8
x8
V
sampling
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_RED_CHANNEL_ONLY
:
ud
//
Dummy
instruction
just
in
order
to
avoid
back
-
2
-
back
send
instructions
!
mov
(
16
)
mAVS_8x8_HDR_UV.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
8
)
<
1
>
mAVS_8x8_HDR_UV
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_V
+
nBI_CURRENT_SRC_V
//
Return
V
in
4
GRFs
//
2
nd
8
x8
setup
#
include
"
AVS_SetupSecondBlock.asm
"
//
2
nd
8
x8
Y
sampling
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_GREEN_CHANNEL_ONLY
:
ud
//
Enable
green
ch
annel
mov
(
1
)
rAVS_PAYLOAD.1
:
f
fVIDEO_STEP_X
:
f
//
Restore
Step
X
for
luma
mov
(
16
)
mAVS_8x8_HDR.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
12
)
<
1
>
mAVS_8x8_HDR
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_1CH
+
nSI_SRC_Y
+
nBI_CURRENT_SRC_Y
//
Return
Y
in
4
GRFs
//------------------------------------------------------------------------------
//
Unpacking
sampler
reads
to
4
:
2
:
2
internal
planar
//------------------------------------------------------------------------------
#
include
"
PL3_AVS_IEF_Unpack_8x8.asm
"
i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PL3_AVS_IEF_Unpack_16x8.asm
----------
#
ifdef
AVS_OUTPUT_16_BIT
//
Output
is
packed
in
AVYU
format
//
Move
first
8
x8
word
s
of
Y
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
1
)
<
4
>
uwAVS_RESPONSE
(
0
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
1
,
1
)
<
4
>
uwAVS_RESPONSE
(
0
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
4
,
1
)
<
4
>
uwAVS_RESPONSE
(
0
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
5
,
1
)
<
4
>
uwAVS_RESPONSE
(
0
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
8
,
1
)
<
4
>
uwAVS_RESPONSE
(
1
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
9
,
1
)
<
4
>
uwAVS_RESPONSE
(
1
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
12
,
1
)
<
4
>
uwAVS_RESPONSE
(
1
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
13
,
1
)
<
4
>
uwAVS_RESPONSE
(
1
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
16
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
17
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
20
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
21
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
24
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
25
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
28
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
29
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
12
)
<
4
;4,1>
//
Move
first
8
x8
word
s
of
U
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
0
)
<
4
>
uwAVS_RESPONSE
(
4
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
1
,
0
)
<
4
>
uwAVS_RESPONSE
(
4
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
4
,
0
)
<
4
>
uwAVS_RESPONSE
(
4
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
5
,
0
)
<
4
>
uwAVS_RESPONSE
(
4
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
8
,
0
)
<
4
>
uwAVS_RESPONSE
(
5
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
9
,
0
)
<
4
>
uwAVS_RESPONSE
(
5
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
12
,
0
)
<
4
>
uwAVS_RESPONSE
(
5
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
13
,
0
)
<
4
>
uwAVS_RESPONSE
(
5
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
16
,
0
)
<
4
>
uwAVS_RESPONSE
(
6
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
17
,
0
)
<
4
>
uwAVS_RESPONSE
(
6
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
20
,
0
)
<
4
>
uwAVS_RESPONSE
(
6
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
21
,
0
)
<
4
>
uwAVS_RESPONSE
(
6
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
24
,
0
)
<
4
>
uwAVS_RESPONSE
(
7
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
25
,
0
)
<
4
>
uwAVS_RESPONSE
(
7
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
28
,
0
)
<
4
>
uwAVS_RESPONSE
(
7
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
29
,
0
)
<
4
>
uwAVS_RESPONSE
(
7
,
12
)
<
4
;4,1>
//
Move
first
8
x8
word
s
of
V
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
2
)
<
4
>
uwAVS_RESPONSE
(
8
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
1
,
2
)
<
4
>
uwAVS_RESPONSE
(
8
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
4
,
2
)
<
4
>
uwAVS_RESPONSE
(
8
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
5
,
2
)
<
4
>
uwAVS_RESPONSE
(
8
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
8
,
2
)
<
4
>
uwAVS_RESPONSE
(
9
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
9
,
2
)
<
4
>
uwAVS_RESPONSE
(
9
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
12
,
2
)
<
4
>
uwAVS_RESPONSE
(
9
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
13
,
2
)
<
4
>
uwAVS_RESPONSE
(
9
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
16
,
2
)
<
4
>
uwAVS_RESPONSE
(
10
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
17
,
2
)
<
4
>
uwAVS_RESPONSE
(
10
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
20
,
2
)
<
4
>
uwAVS_RESPONSE
(
10
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
21
,
2
)
<
4
>
uwAVS_RESPONSE
(
10
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
24
,
2
)
<
4
>
uwAVS_RESPONSE
(
11
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
25
,
2
)
<
4
>
uwAVS_RESPONSE
(
11
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
28
,
2
)
<
4
>
uwAVS_RESPONSE
(
11
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
29
,
2
)
<
4
>
uwAVS_RESPONSE
(
11
,
12
)
<
4
;4,1>
//
Move
first
8
x8
word
s
of
A
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
1
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
4
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
5
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
8
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
9
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
12
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
13
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
16
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
17
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
20
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
21
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
24
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
25
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
28
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
29
,
3
)
<
4
>
0
:
uw
//
Move
second
8
x8
word
s
of
Y
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
3
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
6
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
7
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
10
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
11
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
14
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
15
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
18
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
19
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
22
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
23
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
26
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
27
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
30
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
31
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
12
)
<
4
;4,1>
//
Move
second
8
x8
word
s
of
U
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
3
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
6
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
7
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
10
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
11
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
14
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
15
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
18
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
19
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
22
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
23
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
26
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
27
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
30
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
31
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
12
)
<
4
;4,1>
//
Move
second
8
x8
word
s
of
V
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
3
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
6
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
7
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
10
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
11
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
14
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
15
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
18
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
19
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
22
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
23
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
26
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
27
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
30
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
31
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
12
)
<
4
;4,1>
//
Move
second
8
x8
word
s
of
A
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
3
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
6
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
7
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
10
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
11
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
14
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
15
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
18
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
19
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
22
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
23
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
26
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
27
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
30
,
3
)
<
4
>
0
:
uw
mov
(
4
)
uwDEST_Y
(
31
,
3
)
<
4
>
0
:
uw
/*
This
section
will
be
used
if
16
-
bit
output
is
needed
in
planar
format
-
vK
//
Move
1
st
8
x8
word
s
of
Y
to
dest
GRF
at
lower
8
word
s
of
each
RGF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
)
<
1
>
uwAVS_RESPONSE
(
%
1
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
)
<
1
>
uwAVS_RESPONSE
(
%
1
,
8
)
<
8
;4,1>
}
//
Move
8
x8
word
s
of
U
to
dest
GRF
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_U
(
%
1
*
2
)
<
1
>
uwAVS_RESPONSE
(
%
1
+
4
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
%
1
*
2
+
1
)
<
1
>
uwAVS_RESPONSE
(
%
1
+
4
,
8
)
<
8
;4,1>
}
//
Move
8
x8
word
s
of
V
to
dest
GRF
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_V
(
%
1
*
2
)
<
1
>
uwAVS_RESPONSE
(
%
1
+
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
%
1
*
2
+
1
)
<
1
>
uwAVS_RESPONSE
(
%
1
+
8
,
8
)
<
8
;4,1>
}
//
Move
2
nd
8
x8
word
s
of
Y
to
dest
GRF
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
%
1
)
<
8
;4,1>
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
%
1
,
8
)
<
8
;4,1>
}
//
Move
2
nd
8
x8
word
s
of
U
to
dest
GRF
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_U
(
%
1
*
2
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
%
1
+
4
)
<
8
;4,1>
mov
(
8
)
uwDEST_U
(
%
1
*
2
+
1
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
%
1
+
4
,
8
)
<
8
;4,1>
}
//
Move
2
nd
8
x8
word
s
of
V
to
dest
GRF
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_V
(
%
1
*
2
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
%
1
+
8
)
<
8
;4,1>
mov
(
8
)
uwDEST_V
(
%
1
*
2
+
1
,
8
)
<
1
>
uwAVS_RESPONSE_2
(
%
1
+
8
,
8
)
<
8
;4,1>
}
*/
#
else
/*
OUTPUT_8_BIT
*/
//
Move
1
st
8
x8
word
s
of
Y
to
dest
GRF
at
lower
8
word
s
of
each
RGF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
)
<
1
>
ubAVS_RESPONSE
(
%
1
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
)
<
1
>
ubAVS_RESPONSE
(
%
1
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//
Move
8
x8
word
s
of
U
to
dest
GRF
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_U
(
%
1
*
2
)
<
1
>
ubAVS_RESPONSE
(
%
1
+
4
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_U
(
%
1
*
2
+
1
)
<
1
>
ubAVS_RESPONSE
(
%
1
+
4
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//
Move
8
x8
word
s
of
V
to
dest
GRF
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_V
(
%
1
*
2
)
<
1
>
ubAVS_RESPONSE
(
%
1
+
8
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_V
(
%
1
*
2
+
1
)
<
1
>
ubAVS_RESPONSE
(
%
1
+
8
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//
Move
2
nd
8
x8
word
s
of
Y
to
dest
GRF
at
higher
8
word
s
of
each
RGF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
%
1
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
%
1
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//
Move
2
nd
8
x8
word
s
of
U
to
dest
GRF
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_U
(
%
1
*
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
%
1
+
4
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_U
(
%
1
*
2
+
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
%
1
+
4
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//
Move
2
nd
8
x8
word
s
of
V
to
dest
GRF
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_V
(
%
1
*
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
%
1
+
8
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_V
(
%
1
*
2
+
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
%
1
+
8
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
#
endif
//------------------------------------------------------------------------------
//
Re
-
define
new
#
of
lines
#
undef
nUV_NUM_OF_ROWS
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
#
define
nUV_NUM_OF_ROWS
8
i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PL3_AVS_IEF_Unpack_8x4.asm
----------
//
Move
1
st
8
x8
word
s
of
Y
to
dest
GRF
at
lower
8
word
s
of
each
RGF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
)
<
1
>
ubAVS_RESPONSE
(
%
1
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
)
<
1
>
ubAVS_RESPONSE
(
%
1
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//
Move
8
x4
word
s
of
U
to
dest
GRF
(
Copy
high
byte
in
a
word
)
mov
(
8
)
uwDEST_U
(
0
)
<
1
>
ubAVS_RESPONSE
(
4
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
0
,
8
)
<
1
>
ubAVS_RESPONSE
(
4
,
9
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
1
)
<
1
>
ubAVS_RESPONSE
(
5
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_U
(
1
,
8
)
<
1
>
ubAVS_RESPONSE
(
5
,
9
)
<
16
;4,2>
//
Move
8
x4
word
s
of
V
to
dest
GRF
mov
(
8
)
uwDEST_V
(
0
)
<
1
>
ubAVS_RESPONSE
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
0
,
8
)
<
1
>
ubAVS_RESPONSE
(
8
,
9
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
1
)
<
1
>
ubAVS_RESPONSE
(
9
,
1
)
<
16
;4,2>
mov
(
8
)
uwDEST_V
(
1
,
8
)
<
1
>
ubAVS_RESPONSE
(
9
,
9
)
<
16
;4,2>
//
Move
2
nd
8
x8
word
s
of
Y
to
dest
GRF
at
higher
8
word
s
of
each
RGF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
,
8
)
<
1
>
ubAVS_RESPONSE
(
%
1
+
12
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
,
8
)
<
1
>
ubAVS_RESPONSE
(
%
1
+
12
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//------------------------------------------------------------------------------
//
Re
-
define
new
#
of
lines
#
undef
nUV_NUM_OF_ROWS
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
#
define
nUV_NUM_OF_ROWS
4
i965_drv_video/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PL3_AVS_IEF_Unpack_8x8.asm
----------
//
Move
1
st
8
x8
word
s
of
Y
to
dest
GRF
at
lower
8
word
s
of
each
RGF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
)
<
1
>
ubAVS_RESPONSE
(
%
1
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
)
<
1
>
ubAVS_RESPONSE
(
%
1
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//
Move
8
x8
word
s
of
U
to
dest
GRF
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_U
(
%
1
)
<
1
>
ubAVS_RESPONSE
(
%
1
+
4
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_U
(
%
1
,
8
)
<
1
>
ubAVS_RESPONSE
(
%
1
+
4
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//
Move
8
x8
word
s
of
V
to
dest
GRF
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_V
(
%
1
)
<
1
>
ubAVS_RESPONSE
(
%
1
+
8
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_V
(
%
1
,
8
)
<
1
>
ubAVS_RESPONSE
(
%
1
+
8
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//
Move
2
nd
8
x8
word
s
of
Y
to
dest
GRF
at
higher
8
word
s
of
each
RGF.
$for
(
0
; <8/2; 1) {
mov
(
8
)
uwDEST_Y
(
%
1
*
2
,
8
)
<
1
>
ubAVS_RESPONSE
(
%
1
+
12
,
1
)
<
16
;4,2> // Copy high byte in a word
mov
(
8
)
uwDEST_Y
(
%
1
*
2
+
1
,
8
)
<
1
>
ubAVS_RESPONSE
(
%
1
+
12
,
8
+
1
)
<
16
;4,2> // Copy high byte in a word
}
//------------------------------------------------------------------------------
//
Re
-
define
new
#
of
lines
#
undef
nUV_NUM_OF_ROWS
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
#
define
nUV_NUM_OF_ROWS
8
i965_drv_video/shaders/post_processing/Core_Kernels/PL3_Scaling.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
PL3_Scaling.asm
----------
#
include
"
Scaling.inc
"
//
Build
16
elements
ramp
in
float32
and
normalized
it
//
mov
(
8
)
SAMPLER_RAMP
(
0
)
<
1
>
0x76543210
:
v
//
add
(
8
)
SAMPLER_RAMP
(
1
)
<
1
>
SAMPLER_RAMP
(
0
)
8.0
:
f
mov
(
4
)
SAMPLER_RAMP
(
0
)
<
1
>
0x48403000
:
vf
{
NoDDClr
}
//
3
,
2
,
1
,
0
in
float
vector
mov
(
4
)
SAMPLER_RAMP
(
0
,
4
)
<
1
>
0x5C585450
:
vf
{
NoDDChk
}
//
7
,
6
,
5
,
4
in
float
vector
add
(
8
)
SAMPLER_RAMP
(
1
)
<
1
>
SAMPLER_RAMP
(
0
)
8.0
:
f
//
Module:
PrepareScaleCoord.asm
//
Setup
for
sampler
msg
hdr
mov
(
2
)
rMSGSRC.0
<
1
>
:
ud
0
:
ud
{
NoDDClr
}
//
Unused
fields
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
0
:
ud
{
NoDDChk
}
//
Write
and
offset
//
Calculate
16
v
based
on
the
step
Y
and
vertical
origin
mov
(
16
)
mfMSGPAYLOAD
(
2
)
<
1
>
fS
RC_VID_V_ORI
<
0
;1,0>:f
mov
(
16
)
SCALE_COORD_Y
<
1
>
:
f
fS
RC_VID_V_ORI
<
0
;1,0>:f
//
Calculate
16
u
based
on
the
step
X
and
hori
origin
//
line
(
16
)
mfMSGPAYLOAD
(
0
)
<
1
>
SCALE_STEP_X
<
0
;1,0>:f SAMPLER_RAMP(0) // Assign to mrf directly
mov
(
16
)
acc0
:
f
fS
RC_VID_H_ORI
<
0
;1,0>:f { Compr }
mac
(
16
)
mfMSGPAYLOAD
(
0
)
<
1
>
fVIDEO_STEP_X
<
0
;1,0>:f SAMPLER_RAMP(0) { Compr }
//
Setup
the
constants
for
line
instruction
mov
(
1
)
SCALE_LINE_P255
<
1
>
:
f
255.0
:
f
{
NoDDClr
}
//
{
NoDDClr
,
NoDDChk
}
mov
(
1
)
SCALE_LINE_P0_5
<
1
>
:
f
0.5
:
f
{
NoDDChk
}
//------------------------------------------------------------------------------
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
//
Read
16
sampled
pixels
and
store
them
in
float32
in
8
GRFs
in
the
order
of
BGRA
(
VYUA
)
.
mov
(
8
)
MSGHDR_SCALE
<
1
>
:
ud
rMSGSRC
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
16
)
SCALE_RESPONSE_VW
(
0
)
<
1
>
MSGHDR_SCALE
udDUMMY_NULL
nSMPL_ENGINE
SMPLR_MSG_DSC
+
nSI_SRC_SIMD16_V
+
nBI_CURRENT_SRC_V
send
(
16
)
SCALE_RESPONSE_YW
(
0
)
<
1
>
MSGHDR_SCALE
udDUMMY_NULL
nSMPL_ENGINE
SMPLR_MSG_DSC
+
nSI_SRC_SIMD16_Y
+
nBI_CURRENT_SRC_Y
send
(
16
)
SCALE_RESPONSE_UW
(
0
)
<
1
>
MSGHDR_SCALE
udDUMMY_NULL
nSMPL_ENGINE
SMPLR_MSG_DSC
+
nSI_SRC_SIMD16_U
+
nBI_CURRENT_SRC_U
//
Calculate
16
v
for
next
line
add
(
16
)
mfMSGPAYLOAD
(
2
)
<
1
>
SCALE_COORD_Y
<
8
;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly
add
(
16
)
SCALE_COORD_Y
<
1
>
:
f
SCALE_COORD_Y
<
8
;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly
//
Scale
back
to
[
0
,
255
],
convert
f
to
ud
line
(
16
)
acc0
:
f
SCALE_LINE_P255
<
0
;1,0>:f SCALE_RESPONSE_VF(0) { Compr } // Process B, V
mov
(
16
)
SCALE_RESPONSE_VD
(
0
)
<
1
>
acc0
:
f
{
Compr
}
line
(
16
)
acc0
:
f
SCALE_LINE_P255
<
0
;1,0>:f SCALE_RESPONSE_YF(0) { Compr } // Process B, V
mov
(
16
)
SCALE_RESPONSE_YD
(
0
)
<
1
>
acc0
:
f
{
Compr
}
line
(
16
)
acc0
:
f
SCALE_LINE_P255
<
0
;1,0>:f SCALE_RESPONSE_UF(0) { Compr } // Process B, V
mov
(
16
)
SCALE_RESPONSE_UD
(
0
)
<
1
>
acc0
:
f
{
Compr
}
mov
(
16
)
DEST_V
(
%
1
)
<
1
>
SCALE_RESPONSE_VB
(
0
)
//
possible
error
due
to
truncation
-
vK
mov
(
16
)
DEST_Y
(
%
1
)
<
1
>
SCALE_RESPONSE_YB
(
0
)
//
possible
error
due
to
truncation
-
vK
mov
(
16
)
DEST_U
(
%
1
)
<
1
>
SCALE_RESPONSE_UB
(
0
)
//
possible
error
due
to
truncation
-
vK
}
#
define
nSRC_REGION
nREGION_1
//------------------------------------------------------------------------------
i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
#
define
DI
_ENABLE
#
include
"
DNDI.inc
"
#
ifdef
DI
_ONLY
#
undef
nSMPL_RESP_LEN
#
define
nSMPL_RESP_LEN
nSMPL_RESP_LEN_DI
//
set
the
number
of
GRF
#
else
#
undef
nSMPL_RESP_LEN
#
define
nSMPL_RESP_LEN
nSMPL_RESP_LEN_DNDI
//
set
the
number
of
GRF
#
endif
#
undef
nDPW_BLOCK_SIZE_HIST
#
define
nDPW_BLOCK_SIZE_HIST
nBLOCK_WIDTH_4
+
nBLOCK_HEIGHT_1
//
HIST
Bl
ock
Si
ze
for
Write
is
4
x2
#
undef
nDPW_BLOCK_SIZE_DN
#
define
nDPW_BLOCK_SIZE_DN
nBLOCK_WIDTH_16
+
nBLOCK_HEIGHT_4
//
DN
Bl
ock
Si
ze
for
Write
is
16
x4
//////////////////////////////////////
Run
the
DN
Al
gorithm
///////////////////////////////////////
#
include
"
DNDI_Command.asm
"
//////////////////////////////////////
Rearrange
for
Internal
Planar
//////////////////////////////
//////////////////////////////////////
Save
the
STMM
Data
for
Next
Run
/////////////////////////
//
Write
STMM
to
memory
shr
(
1
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
0
;1,0>:w 1:w NODDCLR_NODDCHK // X origin / 2
mov
(
1
)
rMSGSRC.1
<
1
>
:
ud
wORIY
<
0
;1,0>:w NODDCLR_NODDCHK // Y origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_STMM
:
ud
NODDCHK
//
bl
ock
width
and
height
(
8
x4
)
mov
(
8
)
mudMSGHDR_STMM
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud // message header
mov
(
8
)
mudMSGHDR_STMM
(
1
)
<
1
>
udRESP
(
nDI_STMM_OFFSET
,
0
)
//
Move
STMM
to
MRF
send
(
8
)
dNULLREG
mMSGHDR_STMM
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_STMM
+
nBI_STMM_HISTORY_OUTPUT
:
ud
#
ifdef
DI
_ONLY
#
else
//////////////////////////////////////
Save
the
History
Data
for
Next
Run
/////////////////////////
#
include
"DI_Hist_Save.asm"
//////////////////////////////////////
Save
the
DN
Curr
Frame
for
Next
Run
////////////////////////
//
set
the
save
DN
parameters
mov
(
2
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
2
;2,1>:w NODDCLR // X origin and Y origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_DN
:
ud
NODDCLR_NODDCHK
//
bl
ock
width
and
height
(
16
x4
)
mov
(
8
)
mudMSGHDR_DN
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud
//
check
top
/
bottom
field
first
cmp.e.f0.0
(
1
)
null
<
1
>
:
w
ubTFLD_FIRST
<
0
;1,0>:ub 1:w
(
f0.0
)
jmpi
(
1
)
TOP_FIELD_FIRST
BOTTOM_FIELD_FIRST:
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
)
<
1
>
udRESP
(
nDI_CURR_2ND_FIELD_LUMA_OFFSET
,
%
2
*
4
)
<
4
;4,1> // 2nd field luma from current frame (line 0,2)
}
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
+
4
)
<
1
>
udRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
+%
2
,
4
)
<
4
;4,1> // 1st field luma from current frame (line 1,3)
}
jmpi
(
1
)
SAVE_DN_CURR
TOP_FIELD_FIRST:
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
)
<
1
>
udRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
+%
2
,
0
)
<
4
;4,1> // 2nd field luma from current frame (line 0,2)
}
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
+
4
)
<
1
>
udRESP
(
nDI_CURR_2ND_FIELD_LUMA_OFFSET
,
%
2
*
4
)
<
4
;4,1> // 1st field luma from current frame (line 1,3)
}
SAVE_DN_CURR:
send
(
8
)
dNULLREG
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_PL_DN_DI
+
nBI_DESTINATION_Y
:
ud
#
endif
//
Save
Processed
frames
#
include
"DI_Save_PA.asm"
i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
#
define
DI
_ENABLE
#
include
"
DNDI.inc
"
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
//
Number
of
Y
rows
per
bl
ock
(
4
rows
for
each
frame
)
#
undef
nUV_NUM_OF_ROWS
#
define
nUV_NUM_OF_ROWS
8
//
Number
of
U
/
V
rows
per
bl
ock
#
undef
nSMPL_RESP_LEN
#
define
nSMPL_RESP_LEN
nSMPL_RESP_LEN_DNDI
//
set
the
number
of
GRF
#
undef
nDPW_BLOCK_SIZE_HIST
#
define
nDPW_BLOCK_SIZE_HIST
nBLOCK_WIDTH_4
+
nBLOCK_HEIGHT_1
//
HIST
Bl
ock
Si
ze
for
Write
is
4
x2
#
undef
nDPW_BLOCK_SIZE_DN
#
define
nDPW_BLOCK_SIZE_DN
nBLOCK_WIDTH_16
+
nBLOCK_HEIGHT_4
//
DN
Bl
ock
Si
ze
for
Write
is
16
x4
#
undef
nDPR_BLOCK_SIZE_UV
#
define
nDPR_BLOCK_SIZE_UV
nBLOCK_WIDTH_8
+
nBLOCK_HEIGHT_4
//
DN
Bl
ock
Si
ze
for
UV
Write
/
Read
is
8
x4
//////////////////////////////////////
Run
the
DN
Al
gorithm
///////////////////////////////////////
#
include
"
DNDI_Command.asm
"
//////////////////////////////////////
Rearrange
for
Internal
Planar
//////////////////////////////
//
move
the
previous
frame
Y
component
to
internal
planar
format
$for
(
0
; <nY_NUM_OF_ROWS/2; 1) {
mov
(
16
)
uwDEST_Y
(
%
1
,
0
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_LUMA_OFFSET
,
%
1
*
16
)
}
//
move
the
previous
frame
U
,
V
components
to
internal
planar
format
$for
(
0
; <nUV_NUM_OF_ROWS/2; 1) {
mov
(
8
)
uwDEST_U
(
0
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_CHROMA_OFFSET
,
%
1
*
16
+
1
)
<
16
;8,2> //U pixels
mov
(
8
)
uwDEST_V
(
0
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_CHROMA_OFFSET
,
%
1
*
16
)
<
16
;8,2> //V pixels
}
//
move
the
current
frame
Y
component
to
internal
planar
format
$for
(
0
; <nY_NUM_OF_ROWS/2; 1) {
mov
(
16
)
uwDEST_Y
(
%
1
+
4
,
0
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
,
%
1
*
16
)
}
//
move
the
current
frame
U
,
V
components
to
internal
planar
format
$for
(
0
; <nUV_NUM_OF_ROWS/2; 1) {
mov
(
8
)
uwDEST_U
(
2
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
,
%
1
*
16
+
1
)
<
16
;8,2> //U pixels
mov
(
8
)
uwDEST_V
(
2
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
,
%
1
*
16
)
<
16
;8,2> //V pixels
}
//////////////////////////////////////
Save
the
STMM
Data
for
Next
Run
/////////////////////////
//
Write
STMM
to
memory
shr
(
1
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
0
;1,0>:w 1:w // X origin / 2
mov
(
1
)
rMSGSRC.1
<
1
>
:
ud
wORIY
<
0
;1,0>:w // Y origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_STMM
:
ud
//
bl
ock
width
and
height
(
8
x4
)
mov
(
8
)
mudMSGHDR_STMM
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud // message header
mov
(
8
)
mudMSGHDR_STMM
(
1
)
<
1
>
udRESP
(
nDI_STMM_OFFSET
,
0
)
//
Move
STMM
to
MRF
send
(
8
)
dNULLREG
mMSGHDR_STMM
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_STMM
+
nBI_STMM_HISTORY_OUTPUT
:
ud
//////////////////////////////////////
Save
the
History
Data
for
Next
Run
/////////////////////////
#
include
"DI_Hist_Save.asm"
//////////////////////////////////////
Save
the
DN
Curr
Frame
for
Next
Run
////////////////////////
add
(
4
)
pCF_Y_OFFSET
<
1
>
:
uw
ubSRC_CF_OFFSET
<
4
;4,1>:ub npDN_YUV:w
//
check
top
/
bottom
field
first
cmp.e.f0.0
(
1
)
null
<
1
>
:
w
ubTFLD_FIRST
<
0
;1,0>:ub 1:w
(
f0.0
)
jmpi
(
1
)
TOP_FIELD_FIRST
BOTTOM_FIELD_FIRST:
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
)
<
1
>
udRESP
(
nDI_CURR_2ND_FIELD_LUMA_OFFSET
,
%
2
*
4
)
<
4
;4,1> // 2nd field luma from current frame (line 0,2)
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
+
4
)
<
1
>
udRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
+%
2
,
4
)
<
4
;4,1> // 1st field luma from current frame (line 1,3)
}
jmpi
(
1
)
SAVE_DN_CURR
TOP_FIELD_FIRST:
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
)
<
1
>
udRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
+%
2
,
0
)
<
4
;4,1> // 2nd field luma from current frame (line 0,2)
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
+
4
)
<
1
>
udRESP
(
nDI_CURR_2ND_FIELD_LUMA_OFFSET
,
%
2
*
4
)
<
4
;4,1> // 1st field luma from current frame (line 1,3)
}
SAVE_DN_CURR:
mov
(
2
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
2
;2,1>:w // X origin and Y origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_DN
:
ud
//
bl
ock
width
and
height
(
16
x4
)
mov
(
8
)
mudMSGHDR_DN
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud
send
(
8
)
dNULLREG
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_PL_DN_DI
+
nBI_DESTINATION_Y
:
ud
/////////////////////////////
P208
UV
Copy
422
/////////////////////////////////////////////////////
//
Read
UV
through
DATAPORT
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
asr
(
1
)
rMSGSRC.0
<
1
>
:
d
rMSGSRC.0
<
0
;1,0>:d 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
U
/
V
bl
ock
width
and
height
(
16
x2
)
mov
(
8
)
mudMSGHDR_DN
<
1
>
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udBOT_U_IO
(
0
)
<
1
>
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nRESLEN_1
+
nBI_CURRENT_SRC_UV
:
ud
//
Write
UV
through
DATAPORT
mov
(
2
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
2
;2,1>:w // X origin and Y origin
asr
(
1
)
rMSGSRC.0
<
1
>
:
d
rMSGSRC.0
<
0
;1,0>:d 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
bl
ock
width
and
height
(
16
x2
)
mov
(
8
)
mudMSGHDR_DN
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud
mov
(
8
)
mudMSGHDR_DN
(
1
)
<
1
>
udBOT_U_IO
(
0
)
<
8
;8,1>
send
(
8
)
dNULLREG
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nMSGLEN_1
+
nBI_DESTINATION_UV
:
ud
\ No newline at end of file
i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
#
define
DI
_ENABLE
#
include
"
DNDI.inc
"
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
//
Number
of
Y
rows
per
bl
ock
(
4
rows
for
each
frame
)
#
undef
nUV_NUM_OF_ROWS
#
define
nUV_NUM_OF_ROWS
8
//
Number
of
U
/
V
rows
per
bl
ock
#
undef
nSMPL_RESP_LEN
#
define
nSMPL_RESP_LEN
nSMPL_RESP_LEN_DNDI
//
set
the
number
of
GRF
#
undef
nDPW_BLOCK_SIZE_HIST
#
define
nDPW_BLOCK_SIZE_HIST
nBLOCK_WIDTH_4
+
nBLOCK_HEIGHT_1
//
HIST
Bl
ock
Si
ze
for
Write
is
4
x2
#
undef
nDPW_BLOCK_SIZE_DN
#
define
nDPW_BLOCK_SIZE_DN
nBLOCK_WIDTH_16
+
nBLOCK_HEIGHT_4
//
DN
Bl
ock
Si
ze
for
Write
is
16
x4
#
undef
nDPR_BLOCK_SIZE_UV
#
define
nDPR_BLOCK_SIZE_UV
nBLOCK_WIDTH_16
+
nBLOCK_HEIGHT_2
//
DN
Bl
ock
Si
ze
for
UV
Write
/
Read
is
16
x2
//////////////////////////////////////
Run
the
DN
Al
gorithm
///////////////////////////////////////
#
include
"DNDI_COMMAND.asm"
//////////////////////////////////////
Rearrange
for
Internal
Planar
//////////////////////////////
//
move
the
previous
frame
Y
component
to
internal
planar
format
$for
(
0
; <nY_NUM_OF_ROWS/2; 1) {
mov
(
16
)
uwDEST_Y
(
%
1
,
0
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_LUMA_OFFSET
,
%
1
*
16
)
}
//
move
the
previous
frame
U
,
V
components
to
internal
planar
format
$for
(
0
; <nUV_NUM_OF_ROWS/2; 1) {
mov
(
8
)
uwDEST_U
(
0
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_CHROMA_OFFSET
,
%
1
*
16
+
1
)
<
16
;8,2> //U pixels
mov
(
8
)
uwDEST_V
(
0
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_CHROMA_OFFSET
,
%
1
*
16
)
<
16
;8,2> //V pixels
}
//
move
the
current
frame
Y
component
to
internal
planar
format
$for
(
0
; <nY_NUM_OF_ROWS/2; 1) {
mov
(
16
)
uwDEST_Y
(
%
1
+
4
,
0
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
,
%
1
*
16
)
}
//
move
the
current
frame
U
,
V
components
to
internal
planar
format
$for
(
0
; <nUV_NUM_OF_ROWS/2; 1) {
mov
(
8
)
uwDEST_U
(
2
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
,
%
1
*
16
+
1
)
<
16
;8,2> //U pixels
mov
(
8
)
uwDEST_V
(
2
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
,
%
1
*
16
)
<
16
;8,2> //V pixels
}
//////////////////////////////////////
Save
the
STMM
Data
for
Next
Run
/////////////////////////
//
Write
STMM
to
memory
shr
(
1
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
0
;1,0>:w 1:w // X origin / 2
mov
(
1
)
rMSGSRC.1
<
1
>
:
ud
wORIY
<
0
;1,0>:w // Y origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_STMM
:
ud
//
bl
ock
width
and
height
(
8
x4
)
mov
(
8
)
mudMSGHDR_STMM
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud // message header
mov
(
8
)
mudMSGHDR_STMM
(
1
)
<
1
>
udRESP
(
nDI_STMM_OFFSET
,
0
)
//
Move
STMM
to
MRF
send
(
8
)
dNULLREG
mMSGHDR_STMM
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_STMM
+
nBI_STMM_HISTORY_OUTPUT
:
ud
//////////////////////////////////////
Save
the
History
Data
for
Next
Run
/////////////////////////
#
include
"DI_Hist_Save.asm"
//////////////////////////////////////
Save
the
DN
Curr
Frame
for
Next
Run
////////////////////////
add
(
4
)
pCF_Y_OFFSET
<
1
>
:
uw
ubSRC_CF_OFFSET
<
4
;4,1>:ub npDN_YUV:w
//
check
top
/
bottom
field
first
cmp.e.f0.0
(
1
)
null
<
1
>
:
w
ubTFLD_FIRST
<
0
;1,0>:ub 1:w
(
f0.0
)
jmpi
(
1
)
TOP_FIELD_FIRST
BOTTOM_FIELD_FIRST:
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
)
<
1
>
udRESP
(
nDI_CURR_2ND_FIELD_LUMA_OFFSET
,
%
2
*
4
)
<
4
;4,1> // 2nd field luma from current frame (line 0,2)
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
+
4
)
<
1
>
udRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
+%
2
,
4
)
<
4
;4,1> // 1st field luma from current frame (line 1,3)
}
jmpi
(
1
)
SAVE_DN_CURR
TOP_FIELD_FIRST:
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
)
<
1
>
udRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
+%
2
,
0
)
<
4
;4,1> // 2nd field luma from current frame (line 0,2)
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
+
4
)
<
1
>
udRESP
(
nDI_CURR_2ND_FIELD_LUMA_OFFSET
,
%
2
*
4
)
<
4
;4,1> // 1st field luma from current frame (line 1,3)
}
SAVE_DN_CURR:
mov
(
2
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
2
;2,1>:w // X origin and Y origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_DN
:
ud
//
bl
ock
width
and
height
(
16
x4
)
mov
(
8
)
mudMSGHDR_DN
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud
send
(
8
)
dNULLREG
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_PL_DN_DI
+
nBI_DESTINATION_Y
:
ud
/////////////////////////////
NV12
UV
Copy
422
/////////////////////////////////////////////////////
//
Read
UV
through
DATAPORT
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
asr
(
1
)
rMSGSRC.1
<
1
>
:
d
rMSGSRC.1
<
0
;1,0>:d 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
U
/
V
bl
ock
width
and
height
(
16
x2
)
mov
(
8
)
mudMSGHDR_DN
<
1
>
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udBOT_U_IO
(
0
)
<
1
>
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nRESLEN_1
+
nBI_CURRENT_SRC_UV
:
ud
//
Write
UV
through
DATAPORT
mov
(
2
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
2
;2,1>:w // X origin and Y origin
asr
(
1
)
rMSGSRC.1
<
1
>
:
d
rMSGSRC.1
<
0
;1,0>:d 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
bl
ock
width
and
height
(
16
x2
)
mov
(
8
)
mudMSGHDR_DN
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud
mov
(
8
)
mudMSGHDR_DN
(
1
)
<
1
>
udBOT_U_IO
(
0
)
<
8
;8,1>
send
(
8
)
dNULLREG
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nMSGLEN_1
+
nBI_DESTINATION_UV
:
ud
\ No newline at end of file
i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
#
define
DI
_ENABLE
#
include
"
DNDI.inc
"
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
//
Number
of
Y
rows
per
bl
ock
(
4
rows
for
each
frame
)
#
undef
nUV_NUM_OF_ROWS
#
define
nUV_NUM_OF_ROWS
8
//
Number
of
U
/
V
rows
per
bl
ock
#
undef
nSMPL_RESP_LEN
#
define
nSMPL_RESP_LEN
nSMPL_RESP_LEN_DNDI
//
set
the
number
of
GRF
#
undef
nDPW_BLOCK_SIZE_HIST
#
define
nDPW_BLOCK_SIZE_HIST
nBLOCK_WIDTH_4
+
nBLOCK_HEIGHT_1
//
HIST
Bl
ock
Si
ze
for
Write
is
4
x2
#
undef
nDPW_BLOCK_SIZE_DN
#
define
nDPW_BLOCK_SIZE_DN
nBLOCK_WIDTH_16
+
nBLOCK_HEIGHT_4
//
DN
Bl
ock
Si
ze
for
Write
is
16
x4
//////////////////////////////////////
Run
the
DN
Al
gorithm
///////////////////////////////////////
#
include
"
DNDI_Command.asm
"
//////////////////////////////////////
Rearrange
for
Internal
Planar
//////////////////////////////
//
move
the
previous
frame
Y
component
to
internal
planar
format
$for
(
0
; <nY_NUM_OF_ROWS/2; 1) {
mov
(
16
)
uwDEST_Y
(
%
1
,
0
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_LUMA_OFFSET
,
%
1
*
16
)
}
//
move
the
previous
frame
U
,
V
components
to
internal
planar
format
$for
(
0
; <nUV_NUM_OF_ROWS/2; 1) {
mov
(
8
)
uwDEST_U
(
0
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_CHROMA_OFFSET
,
%
1
*
16
+
1
)
<
16
;8,2> //U pixels
mov
(
8
)
uwDEST_V
(
0
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_CHROMA_OFFSET
,
%
1
*
16
)
<
16
;8,2> //V pixels
}
//
move
the
current
frame
Y
component
to
internal
planar
format
$for
(
0
; <nY_NUM_OF_ROWS/2; 1) {
mov
(
16
)
uwDEST_Y
(
%
1
+
4
,
0
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
,
%
1
*
16
)
}
//
move
the
current
frame
U
,
V
components
to
internal
planar
format
$for
(
0
; <nUV_NUM_OF_ROWS/2; 1) {
mov
(
8
)
uwDEST_U
(
2
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
,
%
1
*
16
+
1
)
<
16
;8,2> //U pixels
mov
(
8
)
uwDEST_V
(
2
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
,
%
1
*
16
)
<
16
;8,2> //V pixels
}
//////////////////////////////////////
Save
the
STMM
Data
for
Next
Run
/////////////////////////
//
Write
STMM
to
memory
shr
(
1
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
0
;1,0>:w 1:w // X origin / 2
mov
(
1
)
rMSGSRC.1
<
1
>
:
ud
wORIY
<
0
;1,0>:w // Y origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_STMM
:
ud
//
bl
ock
width
and
height
(
8
x4
)
mov
(
8
)
mudMSGHDR_STMM
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud // message header
mov
(
8
)
mudMSGHDR_STMM
(
1
)
<
1
>
udRESP
(
nDI_STMM_OFFSET
,
0
)
//
Move
STMM
to
MRF
send
(
8
)
dNULLREG
mMSGHDR_STMM
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_STMM
+
nBI_STMM_HISTORY_OUTPUT
:
ud
//////////////////////////////////////
Save
the
History
Data
for
Next
Run
/////////////////////////
#
include
"DI_Hist_Save.asm"
//////////////////////////////////////
Save
the
DN
Curr
Frame
for
Next
Run
////////////////////////
add
(
4
)
pCF_Y_OFFSET
<
1
>
:
uw
ubSRC_CF_OFFSET
<
4
;4,1>:ub npDN_YUV:w
//
check
top
/
bottom
field
first
cmp.e.f0.0
(
1
)
null
<
1
>
:
w
ubTFLD_FIRST
<
0
;1,0>:ub 1:w
(
f0.0
)
jmpi
(
1
)
TOP_FIELD_FIRST
BOTTOM_FIELD_FIRST:
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
)
<
1
>
udRESP
(
nDI_CURR_2ND_FIELD_LUMA_OFFSET
,
%
2
*
4
)
<
4
;4,1> // 2nd field luma from current frame (line 0,2)
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
+
4
)
<
1
>
udRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
+%
2
,
4
)
<
4
;4,1> // 1st field luma from current frame (line 1,3)
}
jmpi
(
1
)
SAVE_DN_CURR
TOP_FIELD_FIRST:
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
)
<
1
>
udRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
+%
2
,
0
)
<
4
;4,1> // 2nd field luma from current frame (line 0,2)
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
+
4
)
<
1
>
udRESP
(
nDI_CURR_2ND_FIELD_LUMA_OFFSET
,
%
2
*
4
)
<
4
;4,1> // 1st field luma from current frame (line 1,3)
}
SAVE_DN_CURR:
mov
(
2
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
2
;2,1>:w // X origin and Y origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_DN
:
ud
//
bl
ock
width
and
height
(
16
x4
)
mov
(
8
)
mudMSGHDR_DN
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud
send
(
8
)
dNULLREG
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_PL_DN_DI
+
nBI_DESTINATION_Y
:
ud
/////////////////////////////
P208
UV
Copy
422
/////////////////////////////////////////////////////
//
Read
UV
through
DATAPORT
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_DN
:
ud
//
Y
Bl
ock
width
and
height
(
16
x4
)
(
U
/
V
bl
ock
si
ze
is
the
same
)
mov
(
8
)
mudMSGHDR_DN
<
1
>
rMSGSRC
<
8
;8,1>:ud
send
(
8
)
udBOT_U_IO
(
0
)
<
1
>
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nRESLEN_2
+
nBI_CURRENT_SRC_UV
:
ud
//
Write
UV
through
DATAPORT
mov
(
2
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
2
;2,1>:w // X origin and Y origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_DN
:
ud
//
bl
ock
width
and
height
(
16
x4
)
mov
(
8
)
mudMSGHDR_DN
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud
mov
(
8
)
mudMSGHDR_DN
(
1
)
<
1
>
udBOT_U_IO
(
0
)
<
8
;8,1>
mov
(
8
)
mudMSGHDR_DN
(
2
)
<
1
>
udBOT_U_IO
(
1
)
<
8
;8,1>
send
(
8
)
dNULLREG
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_PL_DN_DI
+
nBI_DESTINATION_UV
:
ud
\ No newline at end of file
i965_drv_video/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
#
define
DI
_ENABLE
#
include
"
DNDI.inc
"
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
//
Number
of
Y
rows
per
bl
ock
(
4
rows
for
each
frame
)
#
undef
nUV_NUM_OF_ROWS
#
define
nUV_NUM_OF_ROWS
8
//
Number
of
U
/
V
rows
per
bl
ock
#
undef
nSMPL_RESP_LEN
#
define
nSMPL_RESP_LEN
nSMPL_RESP_LEN_DNDI
//
set
the
number
of
GRF
#
undef
nDPW_BLOCK_SIZE_HIST
#
define
nDPW_BLOCK_SIZE_HIST
nBLOCK_WIDTH_4
+
nBLOCK_HEIGHT_1
//
HIST
Bl
ock
Si
ze
for
Write
is
4
x2
#
undef
nDPW_BLOCK_SIZE_DN
#
define
nDPW_BLOCK_SIZE_DN
nBLOCK_WIDTH_16
+
nBLOCK_HEIGHT_4
//
DN
Bl
ock
Si
ze
for
Write
is
16
x4
#
undef
nDPR_BLOCK_SIZE_UV
#
define
nDPR_BLOCK_SIZE_UV
nBLOCK_WIDTH_8
+
nBLOCK_HEIGHT_2
//
DN
Bl
ock
Si
ze
for
UV
Write
/
Read
is
8
x2
//////////////////////////////////////
Run
the
DN
Al
gorithm
///////////////////////////////////////
#
include
"
DNDI_Command.asm
"
//////////////////////////////////////
Rearrange
for
Internal
Planar
//////////////////////////////
//
move
the
previous
frame
Y
component
to
internal
planar
format
$for
(
0
; <nY_NUM_OF_ROWS/2; 1) {
mov
(
16
)
uwDEST_Y
(
%
1
,
0
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_LUMA_OFFSET
,
%
1
*
16
)
}
//
move
the
previous
frame
U
,
V
components
to
internal
planar
format
$for
(
0
; <nUV_NUM_OF_ROWS/2; 1) {
mov
(
8
)
uwDEST_U
(
0
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_CHROMA_OFFSET
,
%
1
*
16
+
1
)
<
16
;8,2> //U pixels
mov
(
8
)
uwDEST_V
(
0
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_PREV_FRAME_CHROMA_OFFSET
,
%
1
*
16
)
<
16
;8,2> //V pixels
}
//
move
the
current
frame
Y
component
to
internal
planar
format
$for
(
0
; <nY_NUM_OF_ROWS/2; 1) {
mov
(
16
)
uwDEST_Y
(
%
1
+
4
,
0
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
,
%
1
*
16
)
}
//
move
the
current
frame
U
,
V
components
to
internal
planar
format
$for
(
0
; <nUV_NUM_OF_ROWS/2; 1) {
mov
(
8
)
uwDEST_U
(
2
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
,
%
1
*
16
+
1
)
<
16
;8,2> //U pixels
mov
(
8
)
uwDEST_V
(
2
,
%
1
*
8
)
<
1
>
ubRESP
(
nDI_CURR_FRAME_CHROMA_OFFSET
,
%
1
*
16
)
<
16
;8,2> //V pixels
}
//////////////////////////////////////
Save
the
STMM
Data
for
Next
Run
/////////////////////////
//
Write
STMM
to
memory
shr
(
1
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
0
;1,0>:w 1:w // X origin / 2
mov
(
1
)
rMSGSRC.1
<
1
>
:
ud
wORIY
<
0
;1,0>:w // Y origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_STMM
:
ud
//
bl
ock
width
and
height
(
8
x4
)
mov
(
8
)
mudMSGHDR_STMM
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud // message header
mov
(
8
)
mudMSGHDR_STMM
(
1
)
<
1
>
udRESP
(
nDI_STMM_OFFSET
,
0
)
//
Move
STMM
to
MRF
send
(
8
)
dNULLREG
mMSGHDR_STMM
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_STMM
+
nBI_STMM_HISTORY_OUTPUT
:
ud
//////////////////////////////////////
Save
the
History
Data
for
Next
Run
/////////////////////////
#
include
"DI_Hist_Save.asm"
//////////////////////////////////////
Save
the
DN
Curr
Frame
for
Next
Run
////////////////////////
add
(
4
)
pCF_Y_OFFSET
<
1
>
:
uw
ubSRC_CF_OFFSET
<
4
;4,1>:ub npDN_YUV:w
//
check
top
/
bottom
field
first
cmp.e.f0.0
(
1
)
null
<
1
>
:
w
ubTFLD_FIRST
<
0
;1,0>:ub 1:w
(
f0.0
)
jmpi
(
1
)
TOP_FIELD_FIRST
BOTTOM_FIELD_FIRST:
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
)
<
1
>
udRESP
(
nDI_CURR_2ND_FIELD_LUMA_OFFSET
,
%
2
*
4
)
<
4
;4,1> // 2nd field luma from current frame (line 0,2)
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
+
4
)
<
1
>
udRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
+%
2
,
4
)
<
4
;4,1> // 1st field luma from current frame (line 1,3)
}
jmpi
(
1
)
SAVE_DN_CURR
TOP_FIELD_FIRST:
$for
(
0
,
0
; <nY_NUM_OF_ROWS/2; 2,1) {
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
)
<
1
>
udRESP
(
nDI_CURR_FRAME_LUMA_OFFSET
+%
2
,
0
)
<
4
;4,1> // 2nd field luma from current frame (line 0,2)
mov
(
4
)
mudMSGHDR_DN
(
1
,
%
1
*
4
+
4
)
<
1
>
udRESP
(
nDI_CURR_2ND_FIELD_LUMA_OFFSET
,
%
2
*
4
)
<
4
;4,1> // 1st field luma from current frame (line 1,3)
}
SAVE_DN_CURR:
mov
(
2
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
2
;2,1>:w // X origin and Y origin
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPW_BLOCK_SIZE_DN
:
ud
//
bl
ock
width
and
height
(
16
x4
)
mov
(
8
)
mudMSGHDR_DN
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud
send
(
8
)
dNULLREG
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nDPMW_MSG_LEN_PL_DN_DI
+
nBI_DESTINATION_Y
:
ud
/////////////////////////////
IMC3
UV
Copy
422
/////////////////////////////////////////////////////
//
Read
UV
through
DATAPORT
add
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
asr
(
2
)
rMSGSRC.0
<
1
>
:
d
rMSGSRC.0
<
2
;2,1>:d 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
U
/
V
bl
ock
width
and
height
(
8
x2
)
mov
(
8
)
mudMSGHDR_DN
<
1
>
rMSGSRC
<
8
;8,1>:ud
send
(
4
)
udBOT_U_IO
(
0
)
<
1
>
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nRESLEN_1
+
nBI_CURRENT_SRC_U
:
ud
send
(
4
)
udBOT_V_IO
(
0
)
<
1
>
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_READ
nDPMR_MSGDSC
+
nRESLEN_1
+
nBI_CURRENT_SRC_V
:
ud
//
Write
UV
through
DATAPORT
mov
(
2
)
rMSGSRC.0
<
1
>
:
ud
wORIX
<
2
;2,1>:w // X origin and Y origin
asr
(
2
)
rMSGSRC.0
<
1
>
:
d
wORIX
<
2
;2,1>:w 1:w // U/V block origin should be half of Y's
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
nDPR_BLOCK_SIZE_UV
:
ud
//
bl
ock
width
and
height
(
8
x2
)
mov
(
8
)
mudMSGHDR_DN
(
0
)
<
1
>
rMSGSRC.0
<
8
;8,1>:ud
mov
(
4
)
mudMSGHDR_DN
(
1
)
<
1
>
udBOT_U_IO
(
0
)
<
4
;4,1>
send
(
4
)
dNULLREG
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nMSGLEN_1
+
nBI_DESTINATION_U
:
ud
mov
(
4
)
mudMSGHDR_DN
(
1
)
<
1
>
udBOT_V_IO
(
0
)
<
4
;4,1>
send
(
4
)
dNULLREG
mMSGHDR_DN
udDUMMY_NULL
nDATAPORT_WRITE
nDPMW_MSGDSC
+
nMSGLEN_1
+
nBI_DESTINATION_V
:
ud
\ No newline at end of file
i965_drv_video/shaders/post_processing/Core_Kernels/PL_DN_ALG.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
#
define
DI
_DISABLE
#
include
"
DNDI.inc
"
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
//
Number
of
Y
rows
per
bl
ock
#
undef
nSMPL_RESP_LEN
#
define
nSMPL_RESP_LEN
nSMPL_RESP_LEN_DN_PL
//
Set
the
Number
of
GRFs
in
DNDI
response
#
undef
nDPW_BLOCK_SIZE_DN
#
define
nDPW_BLOCK_SIZE_DN
nBLOCK_WIDTH_16
+
nBLOCK_HEIGHT_8
//
DN
Curr
Bl
ock
Si
ze
for
Write
is
16
x8
#
undef
nDPW_BLOCK_SIZE_HIST
#
define
nDPW_BLOCK_SIZE_HIST
nBLOCK_WIDTH_4
+
nBLOCK_HEIGHT_2
//
HIST
Bl
ock
Si
ze
for
Write
is
4
x2
//////////////////////////////////////
Run
the
DN
Al
gorithm
///////////////////////////////////////
#
include
"DNDI_COMMAND.asm"
//////////////////////////////////////
Rearrange
for
Internal
Planar
//////////////////////////////
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
mov
(
16
)
uwDEST_Y
(
0
,
%
1
*
16
)
<
1
>
ubRESP
(
nNODI_LUMA_OFFSET
,
%
1
*
16
)
<
16
;16,1> // copy line of Y
}
//////////////////////////////////////
Save
the
History
Data
for
Next
Run
/////////////////////////
#
include
"DNDI_Hist_Save.asm"
i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_16x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
RGB_AVS_IEF_16x8.asm
----------
#
include
"
AVS_IEF.inc
"
//------------------------------------------------------------------------------
//
2
sampler
reads
for
8
x8
ARGB
packed
//------------------------------------------------------------------------------
//
1
st
8
x8
setup
#
include
"AVS_SetupFirstBlock.asm"
mov
(
1
)
rAVS_8x8_HDR.2
:
ud
nAVS_ALL_CHANNELS
:
ud
//
Enable
ARGB
ch
annels
mov
(
16
)
mAVS_8x8_HDR.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE
(
0
)
<
1
>
mAVS_8x8_HDR
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_4CH
+
nSI_SRC_RGB
+
nBI_CURRENT_SRC_YUV
//
Return
ARGB
in
16
GRFs
//
2
nd
8
x8
setup
#
include
"
AVS_SetupSecondBlock.asm
"
mov
(
16
)
mAVS_8x8_HDR_2.0
:
ud
rAVS_8x8_HDR.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
1
)
uwAVS_RESPONSE_2
(
0
)
<
1
>
mAVS_8x8_HDR_2
udDUMMY_NULL
nSMPL_ENGINE
nAVS_MSG_DSC_4CH
+
nSI_SRC_RGB
+
nBI_CURRENT_SRC_YUV
//
Return
ARGB
in
16
GRFs
i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
RGB_AVS_IEF_Unpack_16x8.asm
----------
#
include
"
AVS_IEF.inc
"
#
ifdef
AVS_OUTPUT_16_BIT
//
Move
first
8
x8
word
s
of
B
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
2
)
<
4
>
uwAVS_RESPONSE
(
4
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
1
,
2
)
<
4
>
uwAVS_RESPONSE
(
4
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
4
,
2
)
<
4
>
uwAVS_RESPONSE
(
4
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
5
,
2
)
<
4
>
uwAVS_RESPONSE
(
4
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
8
,
2
)
<
4
>
uwAVS_RESPONSE
(
5
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
9
,
2
)
<
4
>
uwAVS_RESPONSE
(
5
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
12
,
2
)
<
4
>
uwAVS_RESPONSE
(
5
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
13
,
2
)
<
4
>
uwAVS_RESPONSE
(
5
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
16
,
2
)
<
4
>
uwAVS_RESPONSE
(
12
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
17
,
2
)
<
4
>
uwAVS_RESPONSE
(
12
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
20
,
2
)
<
4
>
uwAVS_RESPONSE
(
12
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
21
,
2
)
<
4
>
uwAVS_RESPONSE
(
12
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
24
,
2
)
<
4
>
uwAVS_RESPONSE
(
13
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
25
,
2
)
<
4
>
uwAVS_RESPONSE
(
13
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
28
,
2
)
<
4
>
uwAVS_RESPONSE
(
13
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
29
,
2
)
<
4
>
uwAVS_RESPONSE
(
13
,
12
)
<
4
;4,1>
//
Move
first
8
x8
word
s
of
G
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
1
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
4
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
5
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
8
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
9
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
12
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
13
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
16
,
1
)
<
4
>
uwAVS_RESPONSE
(
10
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
17
,
1
)
<
4
>
uwAVS_RESPONSE
(
10
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
20
,
1
)
<
4
>
uwAVS_RESPONSE
(
10
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
21
,
1
)
<
4
>
uwAVS_RESPONSE
(
10
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
24
,
1
)
<
4
>
uwAVS_RESPONSE
(
11
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
25
,
1
)
<
4
>
uwAVS_RESPONSE
(
11
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
28
,
1
)
<
4
>
uwAVS_RESPONSE
(
11
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
29
,
1
)
<
4
>
uwAVS_RESPONSE
(
11
,
12
)
<
4
;4,1>
//
Move
first
8
x8
word
s
of
R
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
0
)
<
4
>
uwAVS_RESPONSE
(
0
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
1
,
0
)
<
4
>
uwAVS_RESPONSE
(
0
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
4
,
0
)
<
4
>
uwAVS_RESPONSE
(
0
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
5
,
0
)
<
4
>
uwAVS_RESPONSE
(
0
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
8
,
0
)
<
4
>
uwAVS_RESPONSE
(
1
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
9
,
0
)
<
4
>
uwAVS_RESPONSE
(
1
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
12
,
0
)
<
4
>
uwAVS_RESPONSE
(
1
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
13
,
0
)
<
4
>
uwAVS_RESPONSE
(
1
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
16
,
0
)
<
4
>
uwAVS_RESPONSE
(
8
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
17
,
0
)
<
4
>
uwAVS_RESPONSE
(
8
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
20
,
0
)
<
4
>
uwAVS_RESPONSE
(
8
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
21
,
0
)
<
4
>
uwAVS_RESPONSE
(
8
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
24
,
0
)
<
4
>
uwAVS_RESPONSE
(
9
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
25
,
0
)
<
4
>
uwAVS_RESPONSE
(
9
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
28
,
0
)
<
4
>
uwAVS_RESPONSE
(
9
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
29
,
0
)
<
4
>
uwAVS_RESPONSE
(
9
,
12
)
<
4
;4,1>
//
Move
first
8
x8
word
s
of
A
to
dest
GRF
(
as
packed
)
mov
(
4
)
uwDEST_Y
(
0
,
3
)
<
4
>
uwAVS_RESPONSE
(
6
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
1
,
3
)
<
4
>
uwAVS_RESPONSE
(
6
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
4
,
3
)
<
4
>
uwAVS_RESPONSE
(
6
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
5
,
3
)
<
4
>
uwAVS_RESPONSE
(
6
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
8
,
3
)
<
4
>
uwAVS_RESPONSE
(
7
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
9
,
3
)
<
4
>
uwAVS_RESPONSE
(
7
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
12
,
3
)
<
4
>
uwAVS_RESPONSE
(
7
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
13
,
3
)
<
4
>
uwAVS_RESPONSE
(
7
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
16
,
3
)
<
4
>
uwAVS_RESPONSE
(
14
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
17
,
3
)
<
4
>
uwAVS_RESPONSE
(
14
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
20
,
3
)
<
4
>
uwAVS_RESPONSE
(
14
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
21
,
3
)
<
4
>
uwAVS_RESPONSE
(
14
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
24
,
3
)
<
4
>
uwAVS_RESPONSE
(
15
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
25
,
3
)
<
4
>
uwAVS_RESPONSE
(
15
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
28
,
3
)
<
4
>
uwAVS_RESPONSE
(
15
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
29
,
3
)
<
4
>
uwAVS_RESPONSE
(
15
,
12
)
<
4
;4,1>
//
Move
second
8
x8
word
s
of
B
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
3
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
6
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
7
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
10
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
11
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
14
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
15
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
18
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
12
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
19
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
12
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
22
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
12
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
23
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
12
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
26
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
13
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
27
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
13
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
30
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
13
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
31
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
13
,
12
)
<
4
;4,1>
//
Move
second
8
x8
word
s
of
G
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
3
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
6
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
7
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
10
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
11
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
14
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
15
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
18
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
19
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
22
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
23
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
26
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
27
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
30
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
31
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
12
)
<
4
;4,1>
//
Move
second
8
x8
word
s
of
R
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
3
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
6
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
7
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
10
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
11
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
14
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
15
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
18
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
19
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
22
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
23
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
26
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
27
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
30
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
31
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
12
)
<
4
;4,1>
//
Move
second
8
x8
word
s
of
A
to
dest
GRF
mov
(
4
)
uwDEST_Y
(
2
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
3
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
6
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
7
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
10
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
11
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
14
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
15
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
18
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
14
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
19
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
14
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
22
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
14
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
23
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
14
,
12
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
26
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
15
,
0
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
27
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
15
,
8
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
30
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
15
,
4
)
<
4
;4,1>
mov
(
4
)
uwDEST_Y
(
31
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
15
,
12
)
<
4
;4,1>
#
else
/*
OUTPUT_8_BIT
*/
//
Move
first
8
x8
word
s
of
B
to
dest
GRF
mov
(
8
)
ubDEST_Y
(
0
,
2
)
<
4
>
ubAVS_RESPONSE
(
4
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
2
,
2
)
<
4
>
ubAVS_RESPONSE
(
4
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
4
,
2
)
<
4
>
ubAVS_RESPONSE
(
5
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
6
,
2
)
<
4
>
ubAVS_RESPONSE
(
5
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
8
,
2
)
<
4
>
ubAVS_RESPONSE
(
12
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
10
,
2
)
<
4
>
ubAVS_RESPONSE
(
12
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
12
,
2
)
<
4
>
ubAVS_RESPONSE
(
13
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
14
,
2
)
<
4
>
ubAVS_RESPONSE
(
13
,
8
+
1
)
<
16
;4,2>
//
Move
first
8
x8
word
s
of
G
to
dest
GRF
mov
(
8
)
ubDEST_Y
(
0
,
1
)
<
4
>
ubAVS_RESPONSE
(
2
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
2
,
1
)
<
4
>
ubAVS_RESPONSE
(
2
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
4
,
1
)
<
4
>
ubAVS_RESPONSE
(
3
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
6
,
1
)
<
4
>
ubAVS_RESPONSE
(
3
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
8
,
1
)
<
4
>
ubAVS_RESPONSE
(
10
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
10
,
1
)
<
4
>
ubAVS_RESPONSE
(
10
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
12
,
1
)
<
4
>
ubAVS_RESPONSE
(
11
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
14
,
1
)
<
4
>
ubAVS_RESPONSE
(
11
,
8
+
1
)
<
16
;4,2>
//
Move
first
8
x8
word
s
of
R
to
dest
GRF
mov
(
8
)
ubDEST_Y
(
0
,
0
)
<
4
>
ubAVS_RESPONSE
(
0
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
2
,
0
)
<
4
>
ubAVS_RESPONSE
(
0
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
4
,
0
)
<
4
>
ubAVS_RESPONSE
(
1
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
6
,
0
)
<
4
>
ubAVS_RESPONSE
(
1
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
8
,
0
)
<
4
>
ubAVS_RESPONSE
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
10
,
0
)
<
4
>
ubAVS_RESPONSE
(
8
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
12
,
0
)
<
4
>
ubAVS_RESPONSE
(
9
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
14
,
0
)
<
4
>
ubAVS_RESPONSE
(
9
,
8
+
1
)
<
16
;4,2>
//
Move
first
8
x8
word
s
of
A
to
dest
GRF
mov
(
8
)
ubDEST_Y
(
0
,
3
)
<
4
>
ubAVS_RESPONSE
(
6
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
2
,
3
)
<
4
>
ubAVS_RESPONSE
(
6
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
4
,
3
)
<
4
>
ubAVS_RESPONSE
(
7
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
6
,
3
)
<
4
>
ubAVS_RESPONSE
(
7
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
8
,
3
)
<
4
>
ubAVS_RESPONSE
(
14
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
10
,
3
)
<
4
>
ubAVS_RESPONSE
(
14
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
12
,
3
)
<
4
>
ubAVS_RESPONSE
(
15
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
14
,
3
)
<
4
>
ubAVS_RESPONSE
(
15
,
8
+
1
)
<
16
;4,2>
//
Move
second
8
x8
word
s
of
B
to
dest
GRF
mov
(
8
)
ubDEST_Y
(
1
,
2
)
<
4
>
ubAVS_RESPONSE_2
(
4
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
3
,
2
)
<
4
>
ubAVS_RESPONSE_2
(
4
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
5
,
2
)
<
4
>
ubAVS_RESPONSE_2
(
5
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
7
,
2
)
<
4
>
ubAVS_RESPONSE_2
(
5
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
9
,
2
)
<
4
>
ubAVS_RESPONSE_2
(
12
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
11
,
2
)
<
4
>
ubAVS_RESPONSE_2
(
12
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
13
,
2
)
<
4
>
ubAVS_RESPONSE_2
(
13
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
15
,
2
)
<
4
>
ubAVS_RESPONSE_2
(
13
,
8
+
1
)
<
16
;4,2>
//
Move
second
8
x8
word
s
of
G
to
dest
GRF
mov
(
8
)
ubDEST_Y
(
1
,
1
)
<
4
>
ubAVS_RESPONSE_2
(
2
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
3
,
1
)
<
4
>
ubAVS_RESPONSE_2
(
2
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
5
,
1
)
<
4
>
ubAVS_RESPONSE_2
(
3
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
7
,
1
)
<
4
>
ubAVS_RESPONSE_2
(
3
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
9
,
1
)
<
4
>
ubAVS_RESPONSE_2
(
10
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
11
,
1
)
<
4
>
ubAVS_RESPONSE_2
(
10
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
13
,
1
)
<
4
>
ubAVS_RESPONSE_2
(
11
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
15
,
1
)
<
4
>
ubAVS_RESPONSE_2
(
11
,
8
+
1
)
<
16
;4,2>
//
Move
second
8
x8
word
s
of
R
to
dest
GRF
mov
(
8
)
ubDEST_Y
(
1
,
0
)
<
4
>
ubAVS_RESPONSE_2
(
0
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
3
,
0
)
<
4
>
ubAVS_RESPONSE_2
(
0
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
5
,
0
)
<
4
>
ubAVS_RESPONSE_2
(
1
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
7
,
0
)
<
4
>
ubAVS_RESPONSE_2
(
1
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
9
,
0
)
<
4
>
ubAVS_RESPONSE_2
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
11
,
0
)
<
4
>
ubAVS_RESPONSE_2
(
8
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
13
,
0
)
<
4
>
ubAVS_RESPONSE_2
(
9
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
15
,
0
)
<
4
>
ubAVS_RESPONSE_2
(
9
,
8
+
1
)
<
16
;4,2>
//
Move
second
8
x8
word
s
of
A
to
dest
GRF
mov
(
8
)
ubDEST_Y
(
1
,
3
)
<
4
>
ubAVS_RESPONSE_2
(
6
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
3
,
3
)
<
4
>
ubAVS_RESPONSE_2
(
6
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
5
,
3
)
<
4
>
ubAVS_RESPONSE_2
(
7
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
7
,
3
)
<
4
>
ubAVS_RESPONSE_2
(
7
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
9
,
3
)
<
4
>
ubAVS_RESPONSE_2
(
14
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
11
,
3
)
<
4
>
ubAVS_RESPONSE_2
(
14
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
13
,
3
)
<
4
>
ubAVS_RESPONSE_2
(
15
,
1
)
<
16
;4,2>
mov
(
8
)
ubDEST_Y
(
15
,
3
)
<
4
>
ubAVS_RESPONSE_2
(
15
,
8
+
1
)
<
16
;4,2>
#
endif
//------------------------------------------------------------------------------
//
Set
to
write
bottom
region
to
memory
#
define
SRC_REGION
REGION_2
//
Re
-
define
new
#
of
lines
#
undef
nUV_NUM_OF_ROWS
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
#
define
nUV_NUM_OF_ROWS
8
i965_drv_video/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
RGB_AVS_IEF_Unpack_16x8.asm
----------
#
include
"
AVS_IEF.inc
"
.declare
DEST_B
Base
=
REG
(
r
,
10
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
Ds
tRegion
=
<
1
>
Type
=
uw
.declare
DEST_G
Base
=
REG
(
r
,
18
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
Ds
tRegion
=
<
1
>
Type
=
uw
.declare
DEST_R
Base
=
REG
(
r
,
26
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
Ds
tRegion
=
<
1
>
Type
=
uw
.declare
DEST_A
Base
=
REG
(
r
,
34
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
Ds
tRegion
=
<
1
>
Type
=
uw
#
ifdef
AVS_OUTPUT_16_BIT
//
This
portion
will
need
to
be
ch
anged
if
unpacking
is
required
for
Y416
kernels
(
in
case
of
bl
ending
etc
)
-
vK
////
Move
first
8
x8
word
s
of
B
to
dest
GRF
(
as
packed
)
//
mov
(
4
)
uwDEST_Y
(
0
,
2
)
<
4
>
uwAVS_RESPONSE
(
4
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
1
,
2
)
<
4
>
uwAVS_RESPONSE
(
4
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
4
,
2
)
<
4
>
uwAVS_RESPONSE
(
4
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
5
,
2
)
<
4
>
uwAVS_RESPONSE
(
4
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
8
,
2
)
<
4
>
uwAVS_RESPONSE
(
5
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
9
,
2
)
<
4
>
uwAVS_RESPONSE
(
5
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
12
,
2
)
<
4
>
uwAVS_RESPONSE
(
5
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
13
,
2
)
<
4
>
uwAVS_RESPONSE
(
5
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
16
,
2
)
<
4
>
uwAVS_RESPONSE
(
12
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
17
,
2
)
<
4
>
uwAVS_RESPONSE
(
12
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
20
,
2
)
<
4
>
uwAVS_RESPONSE
(
12
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
21
,
2
)
<
4
>
uwAVS_RESPONSE
(
12
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
24
,
2
)
<
4
>
uwAVS_RESPONSE
(
13
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
25
,
2
)
<
4
>
uwAVS_RESPONSE
(
13
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
28
,
2
)
<
4
>
uwAVS_RESPONSE
(
13
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
29
,
2
)
<
4
>
uwAVS_RESPONSE
(
13
,
12
)
<
4
;4,1>
//
////
Move
first
8
x8
word
s
of
G
to
dest
GRF
(
as
packed
)
//
mov
(
4
)
uwDEST_Y
(
0
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
1
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
4
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
5
,
1
)
<
4
>
uwAVS_RESPONSE
(
2
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
8
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
9
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
12
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
13
,
1
)
<
4
>
uwAVS_RESPONSE
(
3
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
16
,
1
)
<
4
>
uwAVS_RESPONSE
(
10
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
17
,
1
)
<
4
>
uwAVS_RESPONSE
(
10
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
20
,
1
)
<
4
>
uwAVS_RESPONSE
(
10
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
21
,
1
)
<
4
>
uwAVS_RESPONSE
(
10
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
24
,
1
)
<
4
>
uwAVS_RESPONSE
(
11
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
25
,
1
)
<
4
>
uwAVS_RESPONSE
(
11
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
28
,
1
)
<
4
>
uwAVS_RESPONSE
(
11
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
29
,
1
)
<
4
>
uwAVS_RESPONSE
(
11
,
12
)
<
4
;4,1>
//
////
Move
first
8
x8
word
s
of
R
to
dest
GRF
(
as
packed
)
//
mov
(
4
)
uwDEST_Y
(
0
,
0
)
<
4
>
uwAVS_RESPONSE
(
0
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
1
,
0
)
<
4
>
uwAVS_RESPONSE
(
0
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
4
,
0
)
<
4
>
uwAVS_RESPONSE
(
0
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
5
,
0
)
<
4
>
uwAVS_RESPONSE
(
0
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
8
,
0
)
<
4
>
uwAVS_RESPONSE
(
1
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
9
,
0
)
<
4
>
uwAVS_RESPONSE
(
1
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
12
,
0
)
<
4
>
uwAVS_RESPONSE
(
1
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
13
,
0
)
<
4
>
uwAVS_RESPONSE
(
1
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
16
,
0
)
<
4
>
uwAVS_RESPONSE
(
8
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
17
,
0
)
<
4
>
uwAVS_RESPONSE
(
8
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
20
,
0
)
<
4
>
uwAVS_RESPONSE
(
8
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
21
,
0
)
<
4
>
uwAVS_RESPONSE
(
8
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
24
,
0
)
<
4
>
uwAVS_RESPONSE
(
9
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
25
,
0
)
<
4
>
uwAVS_RESPONSE
(
9
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
28
,
0
)
<
4
>
uwAVS_RESPONSE
(
9
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
29
,
0
)
<
4
>
uwAVS_RESPONSE
(
9
,
12
)
<
4
;4,1>
//
////
Move
first
8
x8
word
s
of
A
to
dest
GRF
(
as
packed
)
//
mov
(
4
)
uwDEST_Y
(
0
,
3
)
<
4
>
uwAVS_RESPONSE
(
6
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
1
,
3
)
<
4
>
uwAVS_RESPONSE
(
6
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
4
,
3
)
<
4
>
uwAVS_RESPONSE
(
6
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
5
,
3
)
<
4
>
uwAVS_RESPONSE
(
6
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
8
,
3
)
<
4
>
uwAVS_RESPONSE
(
7
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
9
,
3
)
<
4
>
uwAVS_RESPONSE
(
7
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
12
,
3
)
<
4
>
uwAVS_RESPONSE
(
7
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
13
,
3
)
<
4
>
uwAVS_RESPONSE
(
7
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
16
,
3
)
<
4
>
uwAVS_RESPONSE
(
14
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
17
,
3
)
<
4
>
uwAVS_RESPONSE
(
14
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
20
,
3
)
<
4
>
uwAVS_RESPONSE
(
14
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
21
,
3
)
<
4
>
uwAVS_RESPONSE
(
14
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
24
,
3
)
<
4
>
uwAVS_RESPONSE
(
15
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
25
,
3
)
<
4
>
uwAVS_RESPONSE
(
15
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
28
,
3
)
<
4
>
uwAVS_RESPONSE
(
15
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
29
,
3
)
<
4
>
uwAVS_RESPONSE
(
15
,
12
)
<
4
;4,1>
//
////
Move
second
8
x8
word
s
of
B
to
dest
GRF
//
mov
(
4
)
uwDEST_Y
(
2
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
3
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
6
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
7
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
4
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
10
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
11
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
14
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
15
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
5
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
18
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
12
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
19
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
12
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
22
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
12
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
23
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
12
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
26
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
13
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
27
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
13
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
30
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
13
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
31
,
2
)
<
4
>
uwAVS_RESPONSE_2
(
13
,
12
)
<
4
;4,1>
//
////
Move
second
8
x8
word
s
of
G
to
dest
GRF
//
mov
(
4
)
uwDEST_Y
(
2
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
3
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
6
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
7
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
2
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
10
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
11
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
14
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
15
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
3
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
18
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
19
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
22
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
23
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
10
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
26
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
27
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
30
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
31
,
1
)
<
4
>
uwAVS_RESPONSE_2
(
11
,
12
)
<
4
;4,1>
//
////
Move
second
8
x8
word
s
of
R
to
dest
GRF
//
mov
(
4
)
uwDEST_Y
(
2
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
3
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
6
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
7
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
0
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
10
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
11
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
14
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
15
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
1
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
18
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
19
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
22
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
23
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
8
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
26
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
27
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
30
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
31
,
0
)
<
4
>
uwAVS_RESPONSE_2
(
9
,
12
)
<
4
;4,1>
//
////
Move
second
8
x8
word
s
of
A
to
dest
GRF
//
mov
(
4
)
uwDEST_Y
(
2
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
3
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
6
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
7
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
6
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
10
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
11
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
14
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
15
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
7
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
18
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
14
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
19
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
14
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
22
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
14
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
23
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
14
,
12
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
26
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
15
,
0
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
27
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
15
,
8
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
30
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
15
,
4
)
<
4
;4,1>
//
mov
(
4
)
uwDEST_Y
(
31
,
3
)
<
4
>
uwAVS_RESPONSE_2
(
15
,
12
)
<
4
;4,1>
#
else
/*
OUTPUT_8_BIT
*/
//
Move
first
8
x8
word
s
of
B
to
dest
GRF
mov
(
8
)
DEST_B
(
0
)
<
1
>
ubAVS_RESPONSE
(
4
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_B
(
1
)
<
1
>
ubAVS_RESPONSE
(
4
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_B
(
2
)
<
1
>
ubAVS_RESPONSE
(
5
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_B
(
3
)
<
1
>
ubAVS_RESPONSE
(
5
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_B
(
4
)
<
1
>
ubAVS_RESPONSE
(
12
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_B
(
5
)
<
1
>
ubAVS_RESPONSE
(
12
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_B
(
6
)
<
1
>
ubAVS_RESPONSE
(
13
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_B
(
7
)
<
1
>
ubAVS_RESPONSE
(
13
,
8
+
1
)
<
16
;4,2>
//
Move
first
8
x8
word
s
of
G
to
dest
GRF
mov
(
8
)
DEST_G
(
0
)
<
1
>
ubAVS_RESPONSE
(
2
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_G
(
1
)
<
1
>
ubAVS_RESPONSE
(
2
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_G
(
2
)
<
1
>
ubAVS_RESPONSE
(
3
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_G
(
3
)
<
1
>
ubAVS_RESPONSE
(
3
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_G
(
4
)
<
1
>
ubAVS_RESPONSE
(
10
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_G
(
5
)
<
1
>
ubAVS_RESPONSE
(
10
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_G
(
6
)
<
1
>
ubAVS_RESPONSE
(
11
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_G
(
7
)
<
1
>
ubAVS_RESPONSE
(
11
,
8
+
1
)
<
16
;4,2>
//
Move
first
8
x8
word
s
of
R
to
dest
GRF
mov
(
8
)
DEST_R
(
0
)
<
1
>
ubAVS_RESPONSE
(
0
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_R
(
1
)
<
1
>
ubAVS_RESPONSE
(
0
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_R
(
2
)
<
1
>
ubAVS_RESPONSE
(
1
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_R
(
3
)
<
1
>
ubAVS_RESPONSE
(
1
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_R
(
4
)
<
1
>
ubAVS_RESPONSE
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_R
(
5
)
<
1
>
ubAVS_RESPONSE
(
8
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_R
(
6
)
<
1
>
ubAVS_RESPONSE
(
9
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_R
(
7
)
<
1
>
ubAVS_RESPONSE
(
9
,
8
+
1
)
<
16
;4,2>
//
Move
first
8
x8
word
s
of
A
to
dest
GRF
mov
(
8
)
DEST_A
(
0
)
<
1
>
ubAVS_RESPONSE
(
6
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_A
(
1
)
<
1
>
ubAVS_RESPONSE
(
6
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_A
(
2
)
<
1
>
ubAVS_RESPONSE
(
7
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_A
(
3
)
<
1
>
ubAVS_RESPONSE
(
7
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_A
(
4
)
<
1
>
ubAVS_RESPONSE
(
14
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_A
(
5
)
<
1
>
ubAVS_RESPONSE
(
14
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_A
(
6
)
<
1
>
ubAVS_RESPONSE
(
15
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_A
(
7
)
<
1
>
ubAVS_RESPONSE
(
15
,
8
+
1
)
<
16
;4,2>
//
Move
second
8
x8
word
s
of
B
to
dest
GRF
mov
(
8
)
DEST_B
(
0
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
4
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_B
(
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
4
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_B
(
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
5
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_B
(
3
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
5
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_B
(
4
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
12
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_B
(
5
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
12
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_B
(
6
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
13
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_B
(
7
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
13
,
8
+
1
)
<
16
;4,2>
//
Move
second
8
x8
word
s
of
G
to
dest
GRF
mov
(
8
)
DEST_G
(
0
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
2
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_G
(
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
2
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_G
(
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
3
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_G
(
3
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
3
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_G
(
4
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
10
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_G
(
5
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
10
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_G
(
6
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
11
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_G
(
7
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
11
,
8
+
1
)
<
16
;4,2>
//
Move
second
8
x8
word
s
of
R
to
dest
GRF
mov
(
8
)
DEST_R
(
0
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
0
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_R
(
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
0
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_R
(
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
1
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_R
(
3
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
1
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_R
(
4
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
8
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_R
(
5
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
8
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_R
(
6
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
9
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_R
(
7
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
9
,
8
+
1
)
<
16
;4,2>
//
Move
second
8
x8
word
s
of
A
to
dest
GRF
mov
(
8
)
DEST_A
(
0
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
6
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_A
(
1
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
6
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_A
(
2
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
7
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_A
(
3
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
7
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_A
(
4
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
14
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_A
(
5
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
14
,
8
+
1
)
<
16
;4,2>
mov
(
8
)
DEST_A
(
6
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
15
,
1
)
<
16
;4,2>
mov
(
8
)
DEST_A
(
7
,
8
)
<
1
>
ubAVS_RESPONSE_2
(
15
,
8
+
1
)
<
16
;4,2>
#
endif
//------------------------------------------------------------------------------
//
Set
to
write
bottom
region
to
memory
#
define
SRC_REGION
REGION_1
//
Re
-
define
new
#
of
lines
#
undef
nUV_NUM_OF_ROWS
#
undef
nY_NUM_OF_ROWS
#
define
nY_NUM_OF_ROWS
8
#
define
nUV_NUM_OF_ROWS
8
i965_drv_video/shaders/post_processing/Core_Kernels/RGB_Scaling.asm
0 → 100644
View file @
20975a94
/*
*
All
Video
Processing
kernels
*
Copyright
©
<
2010
>
,
Intel
Corporation.
*
*
This
program
is
licensed
under
the
terms
and
conditions
of
the
*
Eclipse
Public
License
(
EPL
),
version
1.0
.
The
full
text
of
the
EPL
is
at
*
http:
//
www.opensource.org
/
licenses
/
eclipse
-
1.0
.php.
*
*/
//----------
RGB_Scaling.asm
----------
#
include
"
Scaling.inc
"
//
Build
16
elements
ramp
in
float32
and
normalized
it
//
mov
(
8
)
SAMPLER_RAMP
(
0
)
<
1
>
0x76543210
:
v
//
add
(
8
)
SAMPLER_RAMP
(
1
)
<
1
>
SAMPLER_RAMP
(
0
)
8.0
:
f
mov
(
4
)
SAMPLER_RAMP
(
0
)
<
1
>
0x48403000
:
vf
//
3
,
2
,
1
,
0
in
float
vector
mov
(
4
)
SAMPLER_RAMP
(
0
,
4
)
<
1
>
0x5C585450
:
vf
//
7
,
6
,
5
,
4
in
float
vector
add
(
8
)
SAMPLER_RAMP
(
1
)
<
1
>
SAMPLER_RAMP
(
0
)
8.0
:
f
//
Module:
PrepareScaleCoord.asm
//
Setup
for
sampler
msg
hdr
mov
(
2
)
rMSGSRC.0
<
1
>
:
ud
0
:
ud
{
NoDDClr
}
//
Unused
fields
mov
(
1
)
rMSGSRC.2
<
1
>
:
ud
0
:
ud
{
NoDDChk
}
//
Write
and
offset
//
Calculate
16
v
based
on
the
step
Y
and
vertical
origin
mov
(
16
)
mfMSGPAYLOAD
(
2
)
<
1
>
fS
RC_VID_V_ORI
<
0
;1,0>:f
mov
(
16
)
SCALE_COORD_Y
<
1
>
:
f
fS
RC_VID_V_ORI
<
0
;1,0>:f
//
Calculate
16
u
based
on
the
step
X
and
hori
origin
//
line
(
16
)
mfMSGPAYLOAD
(
0
)
<
1
>
SCALE_STEP_X
<
0
;1,0>:f SAMPLER_RAMP(0) // Assign to mrf directly
mov
(
16
)
acc0
:
f
fS
RC_VID_H_ORI
<
0
;1,0>:f { Compr }
mac
(
16
)
mfMSGPAYLOAD
(
0
)
<
1
>
fVIDEO_STEP_X
<
0
;1,0>:f SAMPLER_RAMP(0) { Compr }
//
Setup
the
constants
for
line
instruction
mov
(
1
)
SCALE_LINE_P255
<
1
>
:
f
255.0
:
f
{
NoDDClr
}
//
{
NoDDClr
,
NoDDChk
}
mov
(
1
)
SCALE_LINE_P0_5
<
1
>
:
f
0.5
:
f
{
NoDDChk
}
//------------------------------------------------------------------------------
$for
(
0
; <nY_NUM_OF_ROWS; 1) {
//
Read
16
sampled
pixels
and
store
them
in
float32
in
8
GRFs
in
the
order
of
BGRA
(
VYUA
)
.
mov
(
8
)
MSGHDR_SCALE.0
:
ud
rMSGSRC.0
<
8
;8,1>:ud // Copy msg header and payload mirrors to MRFs
send
(
16
)
SCALE_RESPONSE_YW
(
0
)
<
1
>
MSGHDR_SCALE
udDUMMY_NULL
nSMPL_ENGINE
SMPLR_MSG_DSC
+
nSI_SRC_SIMD16_RGB
+
nBI_CURRENT_SRC_RGB
//
Calculate
16
v
for
next
line
add
(
16
)
mfMSGPAYLOAD
(
2
)
<
1
>
SCALE_COORD_Y
<
8
;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly
add
(
16
)
SCALE_COORD_Y
<
1
>
:
f
SCALE_COORD_Y
<
8
;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly
//
Scale
back
to
[
0
,
255
],
convert
f
to
ud
line
(
16
)
acc0
:
f
SCALE_LINE_P255
<
0
;1,0>:f SCALE_RESPONSE_YF(0) { Compr } // Process B, V
mov
(
16
)
SCALE_RESPONSE_YD
(
0
)
<
1
>
acc0
:
f
{
Compr
}
line
(
16
)
acc0
:
f
SCALE_LINE_P255
<
0
;1,0>:f SCALE_RESPONSE_YF(2) { Compr } // Process B, V
mov
(
16
)
SCALE_RESPONSE_YD
(
2
)
<
1
>
acc0
:
f
{
Compr
}
line
(
16
)
acc0
:
f
SCALE_LINE_P255
<
0
;1,0>:f SCALE_RESPONSE_YF(4) { Compr } // Process B, V
mov
(
16
)
SCALE_RESPONSE_YD
(
4
)
<
1
>
acc0
:
f
{
Compr
}
//#
if
defined
(
SAVE_ARGB
)
//
Only
needed
if
Al
pha
value
is
written
to
the
destination
line
(
16
)
acc0
:
f
SCALE_LINE_P255
<
0
;1,0>:f SCALE_RESPONSE_YF(6) { Compr } // Process B, V
mov
(
16
)
SCALE_RESPONSE_YD
(
6
)
<
1
>
acc0
:
f
{
Compr
}
//#
endif
mov
(
16
)
DEST_R
(
%
1
)
<
1
>
SCALE_RESPONSE_YB
(
0
)
//
possible
error
due
to
truncation
-
vK
mov
(
16
)
DEST_G
(
%
1
)
<
1
>
SCALE_RESPONSE_YB
(
2
)
//
possible
error
due
to
truncation
-
vK
mov
(
16
)
DEST_B
(
%
1
)
<
1
>
SCALE_RESPONSE_YB
(
4
)
//
possible
error
due
to
truncation
-
vK
mov
(
16
)
DEST_A
(
%
1
)
<
1
>
SCALE_RESPONSE_YB
(
6
)
//
possible
error
due
to
truncation
-
vK
}
i965_drv_video/shaders/post_processing/Core_Kernels/Scaling.inc
0 → 100644
View file @
20975a94
/*
* All Video Processing kernels
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
// File name: Scaling.inc
#ifndef _SCALING_INC_
#define _SCALING_INC_
// Local variables----------------------------------------------------------------------------------
#define MSGHDR_SCALE m1 // Message Payload Header (Uses m2, m3, m4, m5 implicitly)
//--------------------------------------------------------------------------------------------------
//r10.0 thru r33.0; Primary surface read from sampler (16x8)
#define DEST_Y uwTOP_Y
#define DEST_U uwTOP_U
#define DEST_V uwTOP_V
//r10.0 thru r41.0
.
declare
DEST_B
Base
=
REG
(
r
,
10
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
DstRegion
=<
1
>
Type
=
uw
.
declare
DEST_G
Base
=
REG
(
r
,
18
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
DstRegion
=<
1
>
Type
=
uw
.
declare
DEST_R
Base
=
REG
(
r
,
26
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
DstRegion
=<
1
>
Type
=
uw
.
declare
DEST_A
Base
=
REG
(
r
,
34
)
ElementSize
=
2
SrcRegion
=
REGION
(
8
,
1
)
DstRegion
=<
1
>
Type
=
uw
//r56.0 thru r79.0
.
declare
SCALE_RESPONSE_YF
Base
=
REG
(
r
,
nBOT_Y
)
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
f
.
declare
SCALE_RESPONSE_UF
Base
=
REG
(
r
,
nBOT_U
)
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
f
.
declare
SCALE_RESPONSE_VF
Base
=
REG
(
r
,
nBOT_V
)
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
f
.
declare
SCALE_RESPONSE_YW
Base
=
REG
(
r
,
nBOT_Y
)
ElementSize
=
2
SrcRegion
=
REGION
(
16
,
1
)
Type
=
uw
.
declare
SCALE_RESPONSE_UW
Base
=
REG
(
r
,
nBOT_U
)
ElementSize
=
2
SrcRegion
=
REGION
(
16
,
1
)
Type
=
uw
.
declare
SCALE_RESPONSE_VW
Base
=
REG
(
r
,
nBOT_V
)
ElementSize
=
2
SrcRegion
=
REGION
(
16
,
1
)
Type
=
uw
.
declare
SCALE_RESPONSE_YD
Base
=
REG
(
r
,
nBOT_Y
)
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
ud
.
declare
SCALE_RESPONSE_UD
Base
=
REG
(
r
,
nBOT_U
)
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
ud
.
declare
SCALE_RESPONSE_VD
Base
=
REG
(
r
,
nBOT_V
)
ElementSize
=
4
SrcRegion
=
REGION
(
8
,
1
)
Type
=
ud
.
declare
SCALE_RESPONSE_YB
Base
=
REG
(
r
,
nBOT_Y
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
4
)
Type
=
ub
.
declare
SCALE_RESPONSE_UB
Base
=
REG
(
r
,
nBOT_U
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
4
)
Type
=
ub
.
declare
SCALE_RESPONSE_VB
Base
=
REG
(
r
,
nBOT_V
)
ElementSize
=
1
SrcRegion
=
REGION
(
8
,
4
)
Type
=
ub
.
declare
SAMPLER_RAMP
Base
=
REG
(
r
,
42
)
ElementSize
=
4
SrcRegion
=<
8
;
8
,
1
>
Type
=
f
// 2 GRFs, 16 elements
//#define SCALE_STEP_X REG2(r,43,0)
//#define SCALE_COORD_X REG2(r,43,3)
#define SCALE_LINE_P255 REG2(r,43,4) // = 255.0 Used in 'line' inst to multiply 255, add 0.5, and round to int.
#define SCALE_LINE_P0_5 REG2(r,43,7) // = 0.5
//r44.0 thru r45.0
#define SCALE_COORD_Y REG(r,44) //2GRF
// Send Message [DevILK] Message Descriptor
// MBZ MsgL=5 MsgR=8 H MBZ SIMD MsgType SmplrIndx BindTab
// 000 0 101 0 1000 1 0 10 0000 0000 00000000
// 0 A 8 A 0 0 0 0
// MsgL=1+2*2(u,v)=5 MsgR=8
#define SMPLR_MSG_DSC 0x0A8A0000 // ILK Sampler Message Descriptor
// Re-define new number of lines
#undef nY_NUM_OF_ROWS
#undef nUV_NUM_OF_ROWS
#define nY_NUM_OF_ROWS 8
#define nUV_NUM_OF_ROWS 8
#endif //_SCALING_INC_
i965_drv_video/shaders/post_processing/Makefile.am
0 → 100644
View file @
20975a94
INTEL_G4I
=
INTEL_G4A
=
null.g4a
INTEL_G4B
=
null.g4b
INTEL_G4B_GEN5
=
null.g4b.gen5
EXTRA_DIST
=
$(INTEL_G4I)
\
$(INTEL_G4A)
\
$(INTEL_G4B)
\
$(INTEL_G4B_GEN5)
if
HAVE_GEN4ASM
SUFFIXES
=
.g4a .g4b
.g4a.g4b
:
m4
$*
.g4a
>
$*
.g4m
&&
intel-gen4asm
-o
$@
$*
.g4m
&&
intel-gen4asm
-g
5
-o
$@
.gen5
$*
.g4m
&&
rm
$*
.g4m
$(INTEL_G4B)
:
$(INTEL_G4I)
BUILT_SOURCES
=
$(INTEL_G4B)
clean-local
:
-
rm
-f
$(INTEL_G4B)
-
rm
-f
$(INTEL_G4B_GEN5)
endif
i965_drv_video/shaders/post_processing/null.g4a
0 → 100644
View file @
20975a94
/* Just for test */
send(16) 0 acc0<1>UW g0<8,8,1>UW thread_spawner(0, 0, 0) mlen 1 rlen 0 {align1 EOT};
i965_drv_video/shaders/post_processing/null.g4b
0 → 100644
View file @
20975a94
{ 0x00800031, 0x24001d28, 0x008d0000, 0x87100000 },
i965_drv_video/shaders/post_processing/null.g4b.gen5
0 → 100644
View file @
20975a94
{ 0x00800031, 0x24001d28, 0x748d0000, 0x82000000 },
i965_drv_video/shaders/post_processing/nv12_avs_nv12.asm
0 → 100644
View file @
20975a94
//
Module
name
:
NV12_AVS_NV12
.kernel
NV12_AVS_NV12
.code
#
define
INC_SCALING
#
include
"
SetupVPKernel.asm
"
#
include
"Multiple_Loop_Head.asm"
#
include
"PL2_AVS_IEF_16x8.asm"
#
include
"PL8x4_Save_NV12.asm"
#
include
"Multiple_Loop.asm"
END_THREAD
//
End
of
Thread
.end_code
.end_kernel
//
end
of
nv12_avs_nv12.asm
i965_drv_video/shaders/post_processing/nv12_avs_nv12.g4b.gen5
0 → 100644
View file @
20975a94
{ 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
{ 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
{ 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
{ 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
{ 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
{ 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
{ 0x01000005, 0x20002d2c, 0x00000088, 0x80008000 },
{ 0x00010001, 0x20c003fd, 0x00000000, 0x00000000 },
{ 0x00000001, 0x212003bd, 0x000000c0, 0x00000000 },
{ 0x00000001, 0x212403bd, 0x000000bc, 0x00000000 },
{ 0x00000001, 0x213403bd, 0x00000038, 0x00000000 },
{ 0x00200001, 0x612803bd, 0x004500a4, 0x00000000 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
{ 0x00802001, 0x20000022, 0x008d0100, 0x00000000 },
{ 0x00000031, 0x25401c09, 0x208d0000, 0x044bb401 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
{ 0x00802001, 0x20400022, 0x008d0100, 0x00000000 },
{ 0x02000031, 0x25c01c09, 0x208d0000, 0x048bb802 },
{ 0x00000001, 0x240803bc, 0x000000a4, 0x00000000 },
{ 0x00000048, 0x24087fbc, 0x000000bc, 0x41000000 },
{ 0x00000048, 0x21287fbd, 0x000000c0, 0x41e00000 },
{ 0x00000001, 0x240403bc, 0x000000bc, 0x00000000 },
{ 0x00000048, 0x21247fbd, 0x000000c0, 0x41000000 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
{ 0x00802001, 0x20000022, 0x008d0100, 0x00000000 },
{ 0x00000031, 0x27401c09, 0x208d0000, 0x044bb401 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
{ 0x00802001, 0x20400022, 0x008d0100, 0x00000000 },
{ 0x02000031, 0x27c01c09, 0x208d0000, 0x048bb802 },
{ 0x00600001, 0x21400229, 0x00aa0541, 0x00000000 },
{ 0x00600001, 0x21600229, 0x00aa0549, 0x00000000 },
{ 0x00600001, 0x21800229, 0x00aa0561, 0x00000000 },
{ 0x00600001, 0x21a00229, 0x00aa0569, 0x00000000 },
{ 0x00600001, 0x21c00229, 0x00aa0581, 0x00000000 },
{ 0x00600001, 0x21e00229, 0x00aa0589, 0x00000000 },
{ 0x00600001, 0x22000229, 0x00aa05a1, 0x00000000 },
{ 0x00600001, 0x22200229, 0x00aa05a9, 0x00000000 },
{ 0x00600001, 0x22400229, 0x00aa05c1, 0x00000000 },
{ 0x00600001, 0x22600229, 0x00aa05c9, 0x00000000 },
{ 0x00600001, 0x22800229, 0x00aa05e1, 0x00000000 },
{ 0x00600001, 0x22a00229, 0x00aa05e9, 0x00000000 },
{ 0x00600001, 0x22c00229, 0x00aa0641, 0x00000000 },
{ 0x00600001, 0x22e00229, 0x00aa0649, 0x00000000 },
{ 0x00600001, 0x23000229, 0x00aa0661, 0x00000000 },
{ 0x00600001, 0x23200229, 0x00aa0669, 0x00000000 },
{ 0x00600001, 0x23400229, 0x00aa0601, 0x00000000 },
{ 0x00600001, 0x23600229, 0x00aa0609, 0x00000000 },
{ 0x00600001, 0x23800229, 0x00aa0621, 0x00000000 },
{ 0x00600001, 0x23a00229, 0x00aa0629, 0x00000000 },
{ 0x00600001, 0x23c00229, 0x00aa0681, 0x00000000 },
{ 0x00600001, 0x23e00229, 0x00aa0689, 0x00000000 },
{ 0x00600001, 0x24000229, 0x00aa06a1, 0x00000000 },
{ 0x00600001, 0x24200229, 0x00aa06a9, 0x00000000 },
{ 0x00600001, 0x21500229, 0x00aa0741, 0x00000000 },
{ 0x00600001, 0x21700229, 0x00aa0749, 0x00000000 },
{ 0x00600001, 0x21900229, 0x00aa0761, 0x00000000 },
{ 0x00600001, 0x21b00229, 0x00aa0769, 0x00000000 },
{ 0x00600001, 0x21d00229, 0x00aa0781, 0x00000000 },
{ 0x00600001, 0x21f00229, 0x00aa0789, 0x00000000 },
{ 0x00600001, 0x22100229, 0x00aa07a1, 0x00000000 },
{ 0x00600001, 0x22300229, 0x00aa07a9, 0x00000000 },
{ 0x00600001, 0x22500229, 0x00aa07c1, 0x00000000 },
{ 0x00600001, 0x22700229, 0x00aa07c9, 0x00000000 },
{ 0x00600001, 0x22900229, 0x00aa07e1, 0x00000000 },
{ 0x00600001, 0x22b00229, 0x00aa07e9, 0x00000000 },
{ 0x00600001, 0x22d00229, 0x00aa0841, 0x00000000 },
{ 0x00600001, 0x22f00229, 0x00aa0849, 0x00000000 },
{ 0x00600001, 0x23100229, 0x00aa0861, 0x00000000 },
{ 0x00600001, 0x23300229, 0x00aa0869, 0x00000000 },
{ 0x00600001, 0x23500229, 0x00aa0801, 0x00000000 },
{ 0x00600001, 0x23700229, 0x00aa0809, 0x00000000 },
{ 0x00600001, 0x23900229, 0x00aa0821, 0x00000000 },
{ 0x00600001, 0x23b00229, 0x00aa0829, 0x00000000 },
{ 0x00600001, 0x23d00229, 0x00aa0881, 0x00000000 },
{ 0x00600001, 0x23f00229, 0x00aa0889, 0x00000000 },
{ 0x00600001, 0x24100229, 0x00aa08a1, 0x00000000 },
{ 0x00600001, 0x24300229, 0x00aa08a9, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
{ 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
{ 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
{ 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
{ 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
{ 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
{ 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
{ 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x01600031, 0x28000c01, 0x408d0000, 0x0228a008 },
{ 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
{ 0x00610001, 0x24400129, 0x028d00b8, 0x00000000 },
{ 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
{ 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
{ 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
{ 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
{ 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
{ 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
{ 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
{ 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
{ 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
{ 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
{ 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
{ 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
{ 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
{ 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
{ 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
{ 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
{ 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
{ 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
{ 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
{ 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
{ 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
{ 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
{ 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
{ 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
{ 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
{ 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
{ 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
{ 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
{ 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
{ 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
{ 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
{ 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
{ 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
{ 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
{ 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
{ 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
{ 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
{ 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
{ 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
{ 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
{ 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
{ 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
{ 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
{ 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
{ 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
{ 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
{ 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
{ 0x01600031, 0x20000c04, 0x508d0000, 0x06082008 },
{ 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
{ 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
{ 0x05000010, 0x2000358c, 0x02210400, 0x00000084 },
{ 0x00000041, 0x24407fbd, 0x000000bc, 0x41800000 },
{ 0x00000040, 0x20a477bd, 0x00000440, 0x000000a4 },
{ 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
{ 0x00010220, 0x34001c00, 0x02001400, 0xfffffede },
{ 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
{ 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
{ 0x00000001, 0x20a403bd, 0x00000094, 0x00000000 },
{ 0x00000041, 0x24407fbd, 0x00000038, 0x41000000 },
{ 0x00000040, 0x20a877bd, 0x00000440, 0x000000a8 },
{ 0x00000220, 0x34001c00, 0x00001400, 0xfffffed2 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
{ 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
{ 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
{ 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
i965_drv_video/shaders/post_processing/nv12_dndi_nv12.asm
0 → 100644
View file @
20975a94
//
Module
name
:
NV12_DNDI_NV12
.kernel
NV12_DNDI_NV12
.code
#
define
INC_DNDI
#
include
"
SetupVPKernel.asm
"
#
include
"Multiple_Loop_Head.asm"
#
include
"PL_DNDI_ALG_UVCopy_NV12.asm"
#
include
"Multiple_Loop.asm"
END_THREAD
//
End
of
Thread
.end_code
.end_kernel
//
end
of
nv12_dndi_nv12.asm
i965_drv_video/shaders/post_processing/nv12_dndi_nv12.g4b.gen5
0 → 100644
View file @
20975a94
{ 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
{ 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
{ 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
{ 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
{ 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
{ 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x00000001, 0x204801aa, 0x000000a0, 0x00000000 },
{ 0x00000001, 0x205801aa, 0x000000a2, 0x00000000 },
{ 0x01600031, 0x24400c01, 0x208d0000, 0x04cb8004 },
{ 0x00800001, 0x21400229, 0x00b10440, 0x00000000 },
{ 0x00800001, 0x21600229, 0x00b10450, 0x00000000 },
{ 0x00800001, 0x21800229, 0x00b10460, 0x00000000 },
{ 0x00800001, 0x21a00229, 0x00b10470, 0x00000000 },
{ 0x00600001, 0x22400229, 0x00ae0481, 0x00000000 },
{ 0x00600001, 0x23400229, 0x00ae0480, 0x00000000 },
{ 0x00600001, 0x22500229, 0x00ae0491, 0x00000000 },
{ 0x00600001, 0x23500229, 0x00ae0490, 0x00000000 },
{ 0x00600001, 0x22600229, 0x00ae04a1, 0x00000000 },
{ 0x00600001, 0x23600229, 0x00ae04a0, 0x00000000 },
{ 0x00600001, 0x22700229, 0x00ae04b1, 0x00000000 },
{ 0x00600001, 0x23700229, 0x00ae04b0, 0x00000000 },
{ 0x00800001, 0x21c00229, 0x00b104c0, 0x00000000 },
{ 0x00800001, 0x21e00229, 0x00b104d0, 0x00000000 },
{ 0x00800001, 0x22000229, 0x00b104e0, 0x00000000 },
{ 0x00800001, 0x22200229, 0x00b104f0, 0x00000000 },
{ 0x00600001, 0x22800229, 0x00ae0501, 0x00000000 },
{ 0x00600001, 0x23800229, 0x00ae0500, 0x00000000 },
{ 0x00600001, 0x22900229, 0x00ae0511, 0x00000000 },
{ 0x00600001, 0x23900229, 0x00ae0510, 0x00000000 },
{ 0x00600001, 0x22a00229, 0x00ae0521, 0x00000000 },
{ 0x00600001, 0x23a00229, 0x00ae0520, 0x00000000 },
{ 0x00600001, 0x22b00229, 0x00ae0531, 0x00000000 },
{ 0x00600001, 0x23b00229, 0x00ae0530, 0x00000000 },
{ 0x00000008, 0x21003da1, 0x000000a0, 0x00010001 },
{ 0x00000001, 0x210401a1, 0x000000a2, 0x00000000 },
{ 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
{ 0x00600001, 0x21600022, 0x008d0100, 0x00000000 },
{ 0x00600001, 0x21800022, 0x008d0540, 0x00000000 },
{ 0x0b600031, 0x20000c04, 0x508d0000, 0x04082014 },
{ 0x00200008, 0x21003da1, 0x004500a0, 0x00020002 },
{ 0x00000040, 0x21002421, 0x00000100, 0x00000034 },
{ 0x00000001, 0x21080061, 0x00000000, 0x00000003 },
{ 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
{ 0x00000001, 0x21c00022, 0x00000560, 0x00000000 },
{ 0x0d600031, 0x20000c04, 0x508d0000, 0x04082014 },
{ 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
{ 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 },
{ 0x00010220, 0x34001c00, 0x00001400, 0x0000000a },
{ 0x00400001, 0x20400022, 0x00690580, 0x00000000 },
{ 0x00400001, 0x20500022, 0x006904d0, 0x00000000 },
{ 0x00400001, 0x20600022, 0x00690590, 0x00000000 },
{ 0x00400001, 0x20700022, 0x006904f0, 0x00000000 },
{ 0x00000220, 0x34001c00, 0x00001400, 0x00000008 },
{ 0x00400001, 0x20400022, 0x006904c0, 0x00000000 },
{ 0x00400001, 0x20500022, 0x00690580, 0x00000000 },
{ 0x00400001, 0x20600022, 0x006904e0, 0x00000000 },
{ 0x00400001, 0x20700022, 0x00690590, 0x00000000 },
{ 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x01600031, 0x20000c04, 0x508d0000, 0x06082007 },
{ 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
{ 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x01600031, 0x28000c01, 0x408d0000, 0x0218a002 },
{ 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
{ 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x00600001, 0x20400022, 0x008d0800, 0x00000000 },
{ 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 },
{ 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
{ 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
{ 0x05000010, 0x2000358c, 0x02210400, 0x00000084 },
{ 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
{ 0x00010220, 0x34001c00, 0x02001400, 0xffffff70 },
{ 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
{ 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
{ 0x00000220, 0x34001c00, 0x00001400, 0xffffff6a },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
{ 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
{ 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
{ 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
i965_drv_video/shaders/post_processing/nv12_load_save_nv12.asm
0 → 100644
View file @
20975a94
//
Module
name
:
NV12_LOAD_SAVE_NV12
.kernel
NV12_LOAD_SAVE_NV12
.code
#
include
"
SetupVPKernel.asm
"
#
include
"Multiple_Loop_Head.asm"
#
include
"NV12_Load_8x4.asm"
#
include
"PL8x4_Save_NV12.asm"
#
include
"Multiple_Loop.asm"
END_THREAD
//
End
of
Thread
.end_code
.end_kernel
//
end
of
nv12_load_save_nv12.asm
i965_drv_video/shaders/post_processing/nv12_load_save_nv12.g4b.gen5
0 → 100644
View file @
20975a94
{ 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
{ 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
{ 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
{ 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
{ 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
{ 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
{ 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x01600031, 0x27000c01, 0x408d0000, 0x0248a001 },
{ 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
{ 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
{ 0x02600031, 0x28000c01, 0x408d0000, 0x0228a002 },
{ 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
{ 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
{ 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
{ 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
{ 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
{ 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
{ 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
{ 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
{ 0x00800001, 0x22600229, 0x00d20820, 0x00000000 },
{ 0x00800001, 0x23600229, 0x00d20821, 0x00000000 },
{ 0x00800001, 0x22400229, 0x00d20800, 0x00000000 },
{ 0x00800001, 0x23400229, 0x00d20801, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
{ 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
{ 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
{ 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
{ 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
{ 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
{ 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
{ 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x01600031, 0x28000c01, 0x408d0000, 0x0228a008 },
{ 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
{ 0x00610001, 0x24400129, 0x028d00b8, 0x00000000 },
{ 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
{ 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
{ 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
{ 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
{ 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
{ 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
{ 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
{ 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
{ 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
{ 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
{ 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
{ 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
{ 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
{ 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
{ 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
{ 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
{ 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
{ 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
{ 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
{ 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
{ 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
{ 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
{ 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
{ 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
{ 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
{ 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
{ 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
{ 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
{ 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
{ 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
{ 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
{ 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
{ 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
{ 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
{ 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
{ 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
{ 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
{ 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
{ 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
{ 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
{ 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
{ 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
{ 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
{ 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
{ 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
{ 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
{ 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
{ 0x01600031, 0x20000c04, 0x508d0000, 0x06082008 },
{ 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
{ 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
{ 0x05000010, 0x2000358c, 0x02210400, 0x00000084 },
{ 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
{ 0x00010220, 0x34001c00, 0x02001400, 0xffffff48 },
{ 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
{ 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
{ 0x00000220, 0x34001c00, 0x00001400, 0xffffff42 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
{ 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
{ 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
{ 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
i965_drv_video/shaders/post_processing/nv12_scaling_nv12.asm
0 → 100644
View file @
20975a94
//
Module
name
:
NV12_SCALING_NV12
.kernel
NV12_SCALING_NV12
.code
#
define
INC_SCALING
#
include
"
SetupVPKernel.asm
"
#
include
"Multiple_Loop_Head.asm"
#
include
"
PL2_Scaling.asm
"
#
include
"PL16x8_PL8x4.asm"
#
include
"PL8x4_Save_NV12.asm"
#
include
"Multiple_Loop.asm"
END_THREAD
//
End
of
Thread
.end_code
.end_kernel
//
end
of
nv12_scaling_nv12.asm
i965_drv_video/shaders/post_processing/nv12_scaling_nv12.g4b.gen5
0 → 100644
View file @
20975a94
{ 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
{ 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
{ 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
{ 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
{ 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
{ 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
{ 0x00400001, 0x254002fd, 0x00000000, 0x48403000 },
{ 0x00400001, 0x255002fd, 0x00000000, 0x5c585450 },
{ 0x00600040, 0x25607fbd, 0x008d0540, 0x41000000 },
{ 0x00200401, 0x21000061, 0x00000000, 0x00000000 },
{ 0x00000801, 0x21080061, 0x00000000, 0x00000000 },
{ 0x00802001, 0x208003be, 0x000000a8, 0x00000000 },
{ 0x00802001, 0x258003bd, 0x000000a8, 0x00000000 },
{ 0x00802001, 0x240003bc, 0x000000a4, 0x00000000 },
{ 0x00802048, 0x204077be, 0x000000bc, 0x008d0540 },
{ 0x00000401, 0x257003fd, 0x00000000, 0x437f0000 },
{ 0x00000801, 0x257c03fd, 0x00000000, 0x3f000000 },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x01800031, 0x27001c09, 0x208d0000, 0x0a8a0101 },
{ 0x01800031, 0x28001c09, 0x208d0000, 0x0a8a0202 },
{ 0x00802040, 0x208077be, 0x008d0580, 0x00000038 },
{ 0x00802040, 0x258077bd, 0x008d0580, 0x00000038 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0700 },
{ 0x00802001, 0x27000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0800 },
{ 0x00802001, 0x28000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0840 },
{ 0x00802001, 0x28400381, 0x00b10400, 0x00000000 },
{ 0x00800001, 0x21400229, 0x00cf0700, 0x00000000 },
{ 0x00800001, 0x22400229, 0x00cf0800, 0x00000000 },
{ 0x00800001, 0x23400229, 0x00cf0840, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x01800031, 0x27001c09, 0x208d0000, 0x0a8a0101 },
{ 0x01800031, 0x28001c09, 0x208d0000, 0x0a8a0202 },
{ 0x00802040, 0x208077be, 0x008d0580, 0x00000038 },
{ 0x00802040, 0x258077bd, 0x008d0580, 0x00000038 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0700 },
{ 0x00802001, 0x27000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0800 },
{ 0x00802001, 0x28000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0840 },
{ 0x00802001, 0x28400381, 0x00b10400, 0x00000000 },
{ 0x00800001, 0x21600229, 0x00cf0700, 0x00000000 },
{ 0x00800001, 0x22600229, 0x00cf0800, 0x00000000 },
{ 0x00800001, 0x23600229, 0x00cf0840, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x01800031, 0x27001c09, 0x208d0000, 0x0a8a0101 },
{ 0x01800031, 0x28001c09, 0x208d0000, 0x0a8a0202 },
{ 0x00802040, 0x208077be, 0x008d0580, 0x00000038 },
{ 0x00802040, 0x258077bd, 0x008d0580, 0x00000038 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0700 },
{ 0x00802001, 0x27000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0800 },
{ 0x00802001, 0x28000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0840 },
{ 0x00802001, 0x28400381, 0x00b10400, 0x00000000 },
{ 0x00800001, 0x21800229, 0x00cf0700, 0x00000000 },
{ 0x00800001, 0x22800229, 0x00cf0800, 0x00000000 },
{ 0x00800001, 0x23800229, 0x00cf0840, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x01800031, 0x27001c09, 0x208d0000, 0x0a8a0101 },
{ 0x01800031, 0x28001c09, 0x208d0000, 0x0a8a0202 },
{ 0x00802040, 0x208077be, 0x008d0580, 0x00000038 },
{ 0x00802040, 0x258077bd, 0x008d0580, 0x00000038 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0700 },
{ 0x00802001, 0x27000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0800 },
{ 0x00802001, 0x28000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0840 },
{ 0x00802001, 0x28400381, 0x00b10400, 0x00000000 },
{ 0x00800001, 0x21a00229, 0x00cf0700, 0x00000000 },
{ 0x00800001, 0x22a00229, 0x00cf0800, 0x00000000 },
{ 0x00800001, 0x23a00229, 0x00cf0840, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x01800031, 0x27001c09, 0x208d0000, 0x0a8a0101 },
{ 0x01800031, 0x28001c09, 0x208d0000, 0x0a8a0202 },
{ 0x00802040, 0x208077be, 0x008d0580, 0x00000038 },
{ 0x00802040, 0x258077bd, 0x008d0580, 0x00000038 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0700 },
{ 0x00802001, 0x27000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0800 },
{ 0x00802001, 0x28000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0840 },
{ 0x00802001, 0x28400381, 0x00b10400, 0x00000000 },
{ 0x00800001, 0x21c00229, 0x00cf0700, 0x00000000 },
{ 0x00800001, 0x22c00229, 0x00cf0800, 0x00000000 },
{ 0x00800001, 0x23c00229, 0x00cf0840, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x01800031, 0x27001c09, 0x208d0000, 0x0a8a0101 },
{ 0x01800031, 0x28001c09, 0x208d0000, 0x0a8a0202 },
{ 0x00802040, 0x208077be, 0x008d0580, 0x00000038 },
{ 0x00802040, 0x258077bd, 0x008d0580, 0x00000038 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0700 },
{ 0x00802001, 0x27000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0800 },
{ 0x00802001, 0x28000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0840 },
{ 0x00802001, 0x28400381, 0x00b10400, 0x00000000 },
{ 0x00800001, 0x21e00229, 0x00cf0700, 0x00000000 },
{ 0x00800001, 0x22e00229, 0x00cf0800, 0x00000000 },
{ 0x00800001, 0x23e00229, 0x00cf0840, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x01800031, 0x27001c09, 0x208d0000, 0x0a8a0101 },
{ 0x01800031, 0x28001c09, 0x208d0000, 0x0a8a0202 },
{ 0x00802040, 0x208077be, 0x008d0580, 0x00000038 },
{ 0x00802040, 0x258077bd, 0x008d0580, 0x00000038 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0700 },
{ 0x00802001, 0x27000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0800 },
{ 0x00802001, 0x28000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0840 },
{ 0x00802001, 0x28400381, 0x00b10400, 0x00000000 },
{ 0x00800001, 0x22000229, 0x00cf0700, 0x00000000 },
{ 0x00800001, 0x23000229, 0x00cf0800, 0x00000000 },
{ 0x00800001, 0x24000229, 0x00cf0840, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x01800031, 0x27001c09, 0x208d0000, 0x0a8a0101 },
{ 0x01800031, 0x28001c09, 0x208d0000, 0x0a8a0202 },
{ 0x00802040, 0x208077be, 0x008d0580, 0x00000038 },
{ 0x00802040, 0x258077bd, 0x008d0580, 0x00000038 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0700 },
{ 0x00802001, 0x27000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0800 },
{ 0x00802001, 0x28000381, 0x00b10400, 0x00000000 },
{ 0x00802059, 0x240077bc, 0x00000570, 0x008d0840 },
{ 0x00802001, 0x28400381, 0x00b10400, 0x00000000 },
{ 0x00800001, 0x22200229, 0x00cf0700, 0x00000000 },
{ 0x00800001, 0x23200229, 0x00cf0800, 0x00000000 },
{ 0x00800001, 0x24200229, 0x00cf0840, 0x00000000 },
{ 0x00600001, 0x22400129, 0x00ae0240, 0x00000000 },
{ 0x00600001, 0x23400129, 0x00ae0340, 0x00000000 },
{ 0x00600001, 0x22500129, 0x00ae0280, 0x00000000 },
{ 0x00600001, 0x23500129, 0x00ae0380, 0x00000000 },
{ 0x00600001, 0x22600129, 0x00ae02c0, 0x00000000 },
{ 0x00600001, 0x23600129, 0x00ae03c0, 0x00000000 },
{ 0x00600001, 0x22700129, 0x00ae0300, 0x00000000 },
{ 0x00600001, 0x23700129, 0x00ae0400, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
{ 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
{ 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
{ 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
{ 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
{ 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
{ 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
{ 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x01600031, 0x28000c01, 0x408d0000, 0x0228a008 },
{ 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
{ 0x00610001, 0x24400129, 0x028d00b8, 0x00000000 },
{ 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
{ 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
{ 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
{ 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
{ 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
{ 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
{ 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
{ 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
{ 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
{ 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
{ 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
{ 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
{ 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
{ 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
{ 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
{ 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
{ 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
{ 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
{ 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
{ 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
{ 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
{ 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
{ 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
{ 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
{ 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
{ 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
{ 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
{ 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
{ 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
{ 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
{ 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
{ 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
{ 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
{ 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
{ 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
{ 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
{ 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
{ 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
{ 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
{ 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
{ 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
{ 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
{ 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
{ 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
{ 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
{ 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
{ 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
{ 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
{ 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
{ 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
{ 0x01600031, 0x20000c04, 0x508d0000, 0x06082008 },
{ 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
{ 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
{ 0x05000010, 0x2000358c, 0x02210400, 0x00000084 },
{ 0x00000041, 0x24407fbd, 0x000000bc, 0x41800000 },
{ 0x00000040, 0x20a477bd, 0x00000440, 0x000000a4 },
{ 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
{ 0x00010220, 0x34001c00, 0x02001400, 0xfffffe66 },
{ 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
{ 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
{ 0x00000001, 0x20a403bd, 0x00000094, 0x00000000 },
{ 0x00000041, 0x24407fbd, 0x00000038, 0x41000000 },
{ 0x00000040, 0x20a877bd, 0x00000440, 0x000000a8 },
{ 0x00000220, 0x34001c00, 0x00001400, 0xfffffe5a },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
{ 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
{ 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
{ 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment