aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristophe Gisquet <christophe.gisquet@gmail.com>2014-05-18 12:02:37 +0200
committerMichael Niedermayer <michaelni@gmx.at>2014-05-18 16:23:58 +0200
commitf1793fe9cdf13b65d5a024b81126da9a26a5434f (patch)
tree64e2c5afd42265ea009c4a43c79fa23f4037f98b
parent0a150670476bc8c38df7add4c59826bff73c204e (diff)
downloadffmpeg-f1793fe9cdf13b65d5a024b81126da9a26a5434f.tar.gz
x86: hevc_mc: specify coefficients registers
By default, macro EPEL_FILTER loads the coefficients inconditionally into m14/m15. This forces an unneeded higher register count. Reduce that count by making them parameters of EPEL_FILTER. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libavcodec/x86/hevc_mc.asm59
1 files changed, 32 insertions, 27 deletions
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index 1fae38c615..8a605d759f 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -112,7 +112,7 @@ QPEL_TABLE 10, 4, w, sse4
%endif
%endmacro
-%macro EPEL_FILTER 2 ; bit depth, filter index
+%macro EPEL_FILTER 2-4 ; bit depth, filter index
%ifdef PIC
lea rfilterq, [hevc_epel_filters_sse4_%1]
%else
@@ -120,8 +120,13 @@ QPEL_TABLE 10, 4, w, sse4
%endif
sub %2q, 1
shl %2q, 5 ; multiply by 32
+%if %0 == 2
movdqa m14, [rfilterq + %2q] ; get 2 first values of filters
movdqa m15, [rfilterq + %2q+16] ; get 2 last values of filters
+%else
+ movdqa %3, [rfilterq + %2q] ; get 2 first values of filters
+ movdqa %4, [rfilterq + %2q+16] ; get 2 last values of filters
+%endif
%endmacro
%macro EPEL_HV_FILTER 1
@@ -539,24 +544,24 @@ cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 7, 7, 6, dst, dststride, src, srcstrid
%macro HEVC_PUT_HEVC_EPEL 2
-cglobal hevc_put_hevc_epel_h%1_%2, 6, 7, 15 , dst, dststride, src, srcstride, height, mx, rfilter
+cglobal hevc_put_hevc_epel_h%1_%2, 6, 7, 6, dst, dststride, src, srcstride, height, mx, rfilter
%assign %%stride ((%2 + 7)/8)
- EPEL_FILTER %2, mx
+ EPEL_FILTER %2, mx, m4, m5
.loop
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
- EPEL_COMPUTE %2, %1, m14, m15
+ EPEL_COMPUTE %2, %1, m4, m5
PEL_10STORE%1 dstq, m0, m1
LOOP_END dst, dststride, src, srcstride
RET
-cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 15 , dst, dststride, src, srcstride, height, mx, rfilter
+cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 7, dst, dststride, src, srcstride, height, mx, rfilter
%assign %%stride ((%2 + 7)/8)
- movdqa m9, [pw_%2]
- EPEL_FILTER %2, mx
+ movdqa m6, [pw_%2]
+ EPEL_FILTER %2, mx, m4, m5
.loop
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
- EPEL_COMPUTE %2, %1, m14, m15
- UNI_COMPUTE %1, %2, m0, m1, m9
+ EPEL_COMPUTE %2, %1, m4, m5
+ UNI_COMPUTE %1, %2, m0, m1, m6
PEL_%2STORE%1 dstq, m0, m1
lea dstq, [dstq+dststrideq] ; dst += dststride
lea srcq, [srcq+srcstrideq] ; src += srcstride
@@ -564,14 +569,14 @@ cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 15 , dst, dststride, src, srcstride
jnz .loop ; height loop
RET
-cglobal hevc_put_hevc_bi_epel_h%1_%2, 8, 9, 15, dst, dststride, src, srcstride, src2, src2stride,height, mx, rfilter
- movdqa m9, [pw_bi_%2]
- EPEL_FILTER %2, mx
+cglobal hevc_put_hevc_bi_epel_h%1_%2, 8, 9, 7, dst, dststride, src, srcstride, src2, src2stride,height, mx, rfilter
+ movdqa m6, [pw_bi_%2]
+ EPEL_FILTER %2, mx, m4, m5
.loop
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
- EPEL_COMPUTE %2, %1, m14, m15
+ EPEL_COMPUTE %2, %1, m4, m5
SIMPLE_BILOAD %1, src2q, m2, m3
- BI_COMPUTE %1, %2, m0, m1, m2, m3, m9
+ BI_COMPUTE %1, %2, m0, m1, m2, m3, m6
PEL_%2STORE%1 dstq, m0, m1
lea dstq, [dstq+dststrideq] ; dst += dststride
lea srcq, [srcq+srcstrideq] ; src += srcstride
@@ -587,26 +592,26 @@ cglobal hevc_put_hevc_bi_epel_h%1_%2, 8, 9, 15, dst, dststride, src, srcstride,
; int16_t* mcbuffer)
; ******************************
-cglobal hevc_put_hevc_epel_v%1_%2, 7, 8, 15 , dst, dststride, src, srcstride, height, r3src, my, rfilter
+cglobal hevc_put_hevc_epel_v%1_%2, 7, 8, 6, dst, dststride, src, srcstride, height, r3src, my, rfilter
lea r3srcq, [srcstrideq*3]
sub srcq, srcstrideq
- EPEL_FILTER %2, my
+ EPEL_FILTER %2, my, m4, m5
.loop
EPEL_LOAD %2, srcq, srcstride, %1
- EPEL_COMPUTE %2, %1, m14, m15
+ EPEL_COMPUTE %2, %1, m4, m5
PEL_10STORE%1 dstq, m0, m1
LOOP_END dst, dststride, src, srcstride
RET
-cglobal hevc_put_hevc_uni_epel_v%1_%2, 7, 8, 15 , dst, dststride, src, srcstride, height, r3src, my, rfilter
+cglobal hevc_put_hevc_uni_epel_v%1_%2, 7, 8, 7, dst, dststride, src, srcstride, height, r3src, my, rfilter
lea r3srcq, [srcstrideq*3]
- movdqa m9, [pw_%2]
+ movdqa m6, [pw_%2]
sub srcq, srcstrideq
- EPEL_FILTER %2, my
+ EPEL_FILTER %2, my, m4, m5
.loop
EPEL_LOAD %2, srcq, srcstride, %1
- EPEL_COMPUTE %2, %1, m14, m15
- UNI_COMPUTE %1, %2, m0, m1, m9
+ EPEL_COMPUTE %2, %1, m4, m5
+ UNI_COMPUTE %1, %2, m0, m1, m6
PEL_%2STORE%1 dstq, m0, m1
lea dstq, [dstq+dststrideq] ; dst += dststride
lea srcq, [srcq+srcstrideq] ; src += srcstride
@@ -615,16 +620,16 @@ cglobal hevc_put_hevc_uni_epel_v%1_%2, 7, 8, 15 , dst, dststride, src, srcstride
RET
-cglobal hevc_put_hevc_bi_epel_v%1_%2, 9, 10, 15, dst, dststride, src, srcstride, src2, src2stride,height, r3src, my, rfilter
+cglobal hevc_put_hevc_bi_epel_v%1_%2, 9, 10, 7, dst, dststride, src, srcstride, src2, src2stride,height, r3src, my, rfilter
lea r3srcq, [srcstrideq*3]
- movdqa m9, [pw_bi_%2]
+ movdqa m6, [pw_bi_%2]
sub srcq, srcstrideq
- EPEL_FILTER %2, my
+ EPEL_FILTER %2, my, m4, m5
.loop
EPEL_LOAD %2, srcq, srcstride, %1
- EPEL_COMPUTE %2, %1, m14, m15
+ EPEL_COMPUTE %2, %1, m4, m5
SIMPLE_BILOAD %1, src2q, m2, m3
- BI_COMPUTE %1, %2, m0, m1, m2, m3, m9
+ BI_COMPUTE %1, %2, m0, m1, m2, m3, m6
PEL_%2STORE%1 dstq, m0, m1
lea dstq, [dstq+dststrideq] ; dst += dststride
lea srcq, [srcq+srcstrideq] ; src += srcstride