diff options
author | Rostislav Pehlivanov <rpehlivanov@ob-encoder.com> | 2016-06-23 18:06:56 +0100 |
---|---|---|
committer | Rostislav Pehlivanov <atomnuker@gmail.com> | 2016-07-11 23:30:11 +0100 |
commit | 80721cc1ff1f1c8c460c136184ed6416a73b4bfd (patch) | |
tree | ff11e24dd41ac6254ccd6a54e6c3ceb1d3781875 /libavcodec/x86 | |
parent | 244d22452c7d40891627e5f7e376558d6b5d8582 (diff) | |
download | ffmpeg-80721cc1ff1f1c8c460c136184ed6416a73b4bfd.tar.gz |
diracdsp: add dequantization SIMD
Currently unused, to be used in the following commits.
Signed-off-by: Rostislav Pehlivanov <rpehlivanov@obe.tv>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/diracdsp.asm | 37 | ||||
-rw-r--r-- | libavcodec/x86/diracdsp_init.c | 6 |
2 files changed, 43 insertions, 0 deletions
diff --git a/libavcodec/x86/diracdsp.asm b/libavcodec/x86/diracdsp.asm index a042413c3a..8e9f0fbf02 100644 --- a/libavcodec/x86/diracdsp.asm +++ b/libavcodec/x86/diracdsp.asm @@ -263,3 +263,40 @@ ADD_RECT sse2 HPEL_FILTER sse2 ADD_OBMC 32, sse2 ADD_OBMC 16, sse2 + +INIT_XMM sse4 + +; void dequant_subband_32(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h) +cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h + movd m2, qfd + movd m3, qsd + SPLATD m2 + SPLATD m3 + mov r4, tot_hq + mov r3, dstq + + .loop_v: + mov tot_hq, r4 + mov dstq, r3 + + .loop_h: + movu m0, [srcq] + + pabsd m1, m0 + pmulld m1, m2 + paddd m1, m3 + psrld m1, 2 + psignd m1, m0 + + movu [dstq], m1 + + add srcq, mmsize + add dstq, mmsize + sub tot_hd, 4 + jg .loop_h + + add r3, strideq + dec tot_vd + jg .loop_v + + RET diff --git a/libavcodec/x86/diracdsp_init.c b/libavcodec/x86/diracdsp_init.c index 5fae79891b..26b885d530 100644 --- a/libavcodec/x86/diracdsp_init.c +++ b/libavcodec/x86/diracdsp_init.c @@ -46,6 +46,8 @@ void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); +void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h); + #if HAVE_YASM #define HPEL_FILTER(MMSIZE, EXT) \ @@ -184,4 +186,8 @@ void ff_diracdsp_init_x86(DiracDSPContext* c) c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2; c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2; } + + if (EXTERNAL_SSE4(mm_flags)) { + c->dequant_subband[1] = ff_dequant_subband_32_sse4; + } } |