diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2015-02-07 18:49:38 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2015-02-16 21:53:51 +0100 |
commit | 89cb4995fa0cc9375d9bb246512a82037ffeedc1 (patch) | |
tree | 057f8ce303c11a32122a2ee66c5959ec5ada465e /libavcodec | |
parent | 22596383f3b8c7cd8a06570957face3875a73d3b (diff) | |
download | ffmpeg-89cb4995fa0cc9375d9bb246512a82037ffeedc1.tar.gz |
x86: hevc_mc: save 1 gpr in epel filter loading
The 3*stride value stored in r3src can be loaded much later,
so use r3src instead of a dedicated gpr when possible.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/hevc_mc.asm | 71 |
1 files changed, 35 insertions, 36 deletions
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm index 9a7367af58..6ef8a60b86 100644 --- a/libavcodec/x86/hevc_mc.asm +++ b/libavcodec/x86/hevc_mc.asm @@ -136,20 +136,22 @@ QPEL_TABLE 10, 8, w, avx2 %endmacro -%macro EPEL_FILTER 2-4 ; bit depth, filter index +%macro EPEL_FILTER 5 ; bit depth, filter index, xmma, xmmb, gprtmp %if cpuflag(avx2) %assign %%offset 32 %ifdef PIC - lea rfilterq, [hevc_epel_filters_avx2_%1] + lea %5q, [hevc_epel_filters_avx2_%1] + %define FILTER %5q %else - %define rfilterq hevc_epel_filters_avx2_%1 + %define FILTER hevc_epel_filters_avx2_%1 %endif %else %assign %%offset 16 %ifdef PIC - lea rfilterq, [hevc_epel_filters_sse4_%1] + lea %5q, [hevc_epel_filters_sse4_%1] + %define FILTER %5q %else - %define rfilterq hevc_epel_filters_sse4_%1 + %define FILTER hevc_epel_filters_sse4_%1 %endif %endif ;cpuflag(avx2) sub %2q, 1 @@ -158,13 +160,8 @@ QPEL_TABLE 10, 8, w, avx2 %else shl %2q, 5 ; multiply by 32 %endif -%if %0 == 2 - mova m14, [rfilterq + %2q] ; get 2 first values of filters - mova m15, [rfilterq + %2q+%%offset] ; get 2 last values of filters -%else - mova %3, [rfilterq + %2q] ; get 2 first values of filters - mova %4, [rfilterq + %2q+%%offset] ; get 2 last values of filters -%endif + mova %3, [FILTER + %2q] ; get 2 first values of filters + mova %4, [FILTER + %2q+%%offset] ; get 2 last values of filters %endmacro %macro EPEL_HV_FILTER 1 @@ -179,17 +176,17 @@ QPEL_TABLE 10, 8, w, avx2 %endif %ifdef PIC - lea rfilterq, [%%table] + lea r3srcq, [%%table] + %define FILTER r3srcq %else - %define rfilterq %%table + %define FILTER %%table %endif sub mxq, 1 sub myq, 1 shl mxq, %%shift ; multiply by 32 shl myq, %%shift ; multiply by 32 - mova m14, [rfilterq + mxq] ; get 2 first values of filters - mova m15, [rfilterq + mxq+%%offset] ; get 2 last values of filters - lea r3srcq, [srcstrideq*3] + mova m14, [FILTER + mxq] ; get 2 first values of filters + mova m15, [FILTER + mxq+%%offset] ; get 2 last values of filters %if cpuflag(avx2) %define %%table hevc_epel_filters_avx2_10 @@ -197,12 +194,14 @@ QPEL_TABLE 10, 8, w, avx2 %define %%table hevc_epel_filters_sse4_10 %endif %ifdef PIC - lea rfilterq, [%%table] + lea r3srcq, [%%table] + %define FILTER r3srcq %else - %define rfilterq %%table + %define FILTER %%table %endif - mova m12, [rfilterq + myq] ; get 2 first values of filters - mova m13, [rfilterq + myq+%%offset] ; get 2 last values of filters + mova m12, [FILTER + myq] ; get 2 first values of filters + mova m13, [FILTER + myq+%%offset] ; get 2 last values of filters + lea r3srcq, [srcstrideq*3] %endmacro %macro QPEL_FILTER 2 @@ -733,7 +732,7 @@ cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 6, 6, 6, dst, dststride, src, srcstrid %macro HEVC_PUT_HEVC_EPEL 2 cglobal hevc_put_hevc_epel_h%1_%2, 5, 6, 11, dst, src, srcstride, height, mx, rfilter %assign %%stride ((%2 + 7)/8) - EPEL_FILTER %2, mx, m4, m5 + EPEL_FILTER %2, mx, m4, m5, rfilter .loop EPEL_LOAD %2, srcq-%%stride, %%stride, %1 EPEL_COMPUTE %2, %1, m4, m5, 1 @@ -744,7 +743,7 @@ cglobal hevc_put_hevc_epel_h%1_%2, 5, 6, 11, dst, src, srcstride, height, mx, rf cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 11, dst, dststride, src, srcstride, height, mx, rfilter %assign %%stride ((%2 + 7)/8) movdqa m6, [pw_%2] - EPEL_FILTER %2, mx, m4, m5 + EPEL_FILTER %2, mx, m4, m5, rfilter .loop EPEL_LOAD %2, srcq-%%stride, %%stride, %1 EPEL_COMPUTE %2, %1, m4, m5 @@ -758,7 +757,7 @@ cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 11, dst, dststride, src, srcstride, cglobal hevc_put_hevc_bi_epel_h%1_%2, 7, 8, 11, dst, dststride, src, srcstride, src2, height, mx, rfilter movdqa m6, [pw_bi_%2] - EPEL_FILTER %2, mx, m4, m5 + EPEL_FILTER %2, mx, m4, m5, rfilter .loop EPEL_LOAD %2, srcq-%%stride, %%stride, %1 EPEL_COMPUTE %2, %1, m4, m5, 1 @@ -778,11 +777,11 @@ cglobal hevc_put_hevc_bi_epel_h%1_%2, 7, 8, 11, dst, dststride, src, srcstride, ; int height, int mx, int my, int width) ; ****************************** -cglobal hevc_put_hevc_epel_v%1_%2, 4, 7, 11, dst, src, srcstride, height, r3src, my, rfilter +cglobal hevc_put_hevc_epel_v%1_%2, 4, 6, 11, dst, src, srcstride, height, r3src, my movifnidn myd, mym - lea r3srcq, [srcstrideq*3] sub srcq, srcstrideq - EPEL_FILTER %2, my, m4, m5 + EPEL_FILTER %2, my, m4, m5, r3src + lea r3srcq, [srcstrideq*3] .loop EPEL_LOAD %2, srcq, srcstride, %1 EPEL_COMPUTE %2, %1, m4, m5, 1 @@ -790,12 +789,12 @@ cglobal hevc_put_hevc_epel_v%1_%2, 4, 7, 11, dst, src, srcstride, height, r3src, LOOP_END dst, src, srcstride RET -cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 8, 11, dst, dststride, src, srcstride, height, r3src, my, rfilter +cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 7, 11, dst, dststride, src, srcstride, height, r3src, my movifnidn myd, mym - lea r3srcq, [srcstrideq*3] movdqa m6, [pw_%2] sub srcq, srcstrideq - EPEL_FILTER %2, my, m4, m5 + EPEL_FILTER %2, my, m4, m5, r3src + lea r3srcq, [srcstrideq*3] .loop EPEL_LOAD %2, srcq, srcstride, %1 EPEL_COMPUTE %2, %1, m4, m5 @@ -808,12 +807,12 @@ cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 8, 11, dst, dststride, src, srcstride, RET -cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 9, 11, dst, dststride, src, srcstride, src2, height, r3src, my, rfilter +cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 8, 11, dst, dststride, src, srcstride, src2, height, r3src, my movifnidn myd, mym - lea r3srcq, [srcstrideq*3] movdqa m6, [pw_bi_%2] sub srcq, srcstrideq - EPEL_FILTER %2, my, m4, m5 + EPEL_FILTER %2, my, m4, m5, r3src + lea r3srcq, [srcstrideq*3] .loop EPEL_LOAD %2, srcq, srcstride, %1 EPEL_COMPUTE %2, %1, m4, m5, 1 @@ -836,7 +835,7 @@ cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 9, 11, dst, dststride, src, srcstride, ; ****************************** %macro HEVC_PUT_HEVC_EPEL_HV 2 -cglobal hevc_put_hevc_epel_hv%1_%2, 6, 8, 16 , dst, src, srcstride, height, mx, my, r3src, rfilter +cglobal hevc_put_hevc_epel_hv%1_%2, 6, 7, 16 , dst, src, srcstride, height, mx, my, r3src %assign %%stride ((%2 + 7)/8) sub srcq, srcstrideq EPEL_HV_FILTER %2 @@ -902,7 +901,7 @@ cglobal hevc_put_hevc_epel_hv%1_%2, 6, 8, 16 , dst, src, srcstride, height, mx, LOOP_END dst, src, srcstride RET -cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 9, 16 , dst, dststride, src, srcstride, height, mx, my, r3src, rfilter +cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 8, 16 , dst, dststride, src, srcstride, height, mx, my, r3src %assign %%stride ((%2 + 7)/8) sub srcq, srcstrideq EPEL_HV_FILTER %2 @@ -966,7 +965,7 @@ cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 9, 16 , dst, dststride, src, srcstrid jnz .loop ; height loop RET -cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 10, 16, dst, dststride, src, srcstride, src2, height, mx, my, r3src, rfilter +cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 9, 16, dst, dststride, src, srcstride, src2, height, mx, my, r3src %assign %%stride ((%2 + 7)/8) sub srcq, srcstrideq EPEL_HV_FILTER %2 |