diff options
author | James Almer <jamrial@gmail.com> | 2017-03-31 20:42:16 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-03-31 20:49:45 -0300 |
commit | 6171f178e70ebe75e5964531f47ccc32455d5557 (patch) | |
tree | f27d489711d05638f3a511aab7a554ff53293f86 | |
parent | b62a87591ebe57923bc9aabf487dbce0a7af2627 (diff) | |
download | ffmpeg-6171f178e70ebe75e5964531f47ccc32455d5557.tar.gz |
x86/hevc_add_res: merge last remaining changes from 3d6535983282bea542dac2e568ae50da5796be34
See https://lists.libav.org/pipermail/libav-devel/2016-October/079829.html
-rw-r--r-- | doc/libav-merge.txt | 1 | ||||
-rw-r--r-- | libavcodec/x86/hevc_add_res.asm | 46 |
2 files changed, 15 insertions, 32 deletions
diff --git a/doc/libav-merge.txt b/doc/libav-merge.txt index 0cbd9f4a90..30518c0a67 100644 --- a/doc/libav-merge.txt +++ b/doc/libav-merge.txt @@ -97,7 +97,6 @@ Stuff that didn't reach the codebase: - VAAPI VP8 decode hwaccel (currently under review: http://ffmpeg.org/pipermail/ffmpeg-devel/2017-February/thread.html#207348) - Removal of the custom atomic API (5cc0057f49, see http://ffmpeg.org/pipermail/ffmpeg-devel/2017-March/209003.html) - Use the new bitstream filter for extracting extradata (8e2ea69135 and 096a8effa3, see https://ffmpeg.org/pipermail/ffmpeg-devel/2017-March/209068.html) -- ADD_RES_MMX_4_8 in libavcodec/x86/hevc_add_res.asm probably needs updating (see 589880710) - Read aac_adtstoasc extradata updates from packet side data on Matroska once mov and the bsf in question are fixed (See 13a211e632 and 5ef1959080) Collateral damage that needs work locally: diff --git a/libavcodec/x86/hevc_add_res.asm b/libavcodec/x86/hevc_add_res.asm index d97e4abddb..36d4d8e2e2 100644 --- a/libavcodec/x86/hevc_add_res.asm +++ b/libavcodec/x86/hevc_add_res.asm @@ -28,25 +28,23 @@ cextern pw_1023 ; the add_res macros and functions were largely inspired by h264_idct.asm from the x264 project %macro ADD_RES_MMX_4_8 0 - mova m2, [r1] - mova m4, [r1+8] + mova m0, [r1] + mova m2, [r1+8] + pxor m1, m1 pxor m3, m3 + psubw m1, m0 psubw m3, m2 - packuswb m2, m2 - packuswb m3, m3 - pxor m5, m5 - psubw m5, m4 - packuswb m4, m4 - packuswb m5, m5 - - movh m0, [r0] - movh m1, [r0+r2] + packuswb m0, m2 + packuswb m1, m3 + + movd m2, [r0] + movd m3, [r0+r2] + punpckldq m2, m3 paddusb m0, m2 - paddusb m1, m4 - psubusb m0, m3 - psubusb m1, m5 - movh [r0], m0 - movh [r0+r2], m1 + psubusb m0, m1 + movd [r0], m0 + psrlq m0, 32 + movd [r0+r2], m0 %endmacro @@ -95,15 +93,8 @@ cglobal hevc_add_residual_4_8, 3, 3, 6 vinserti128 m2, m2, [r1+%1+32], 1 vinserti128 m6, m6, [r1+%1+48], 1 %endif -%if cpuflag(avx) psubw m1, m0, m2 psubw m5, m0, m6 -%else - mova m1, m0 - mova m5, m0 - psubw m1, m2 - psubw m5, m6 -%endif packuswb m2, m6 packuswb m1, m5 @@ -113,15 +104,8 @@ cglobal hevc_add_residual_4_8, 3, 3, 6 vinserti128 m4, m4, [r1+%1+96 ], 1 vinserti128 m6, m6, [r1+%1+112], 1 %endif -%if cpuflag(avx) psubw m3, m0, m4 psubw m5, m0, m6 -%else - mova m3, m0 - mova m5, m0 - psubw m3, m4 - psubw m5, m6 -%endif packuswb m4, m6 packuswb m3, m5 @@ -192,7 +176,7 @@ cglobal hevc_add_residual_32_8, 3, 5, 7 dec r4d jg .loop RET -%endif +%endif ;HAVE_AVX2_EXTERNAL %macro ADD_RES_SSE_8_10 4 mova m0, [%4] |