diff options
author | Martin Vignali <martin.vignali@gmail.com> | 2017-10-01 21:37:15 +0200 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-10-01 17:35:30 -0300 |
commit | ac5908b13f16cbda396730c35f5f3125ca24577a (patch) | |
tree | 01f49a554ed8199b7aaa5d095c859796c6c10c18 | |
parent | 59924d5eb11646f82f70c206be8a867468f102b9 (diff) | |
download | ffmpeg-ac5908b13f16cbda396730c35f5f3125ca24577a.tar.gz |
libavcodec/exr : add x86 SIMD for predictor
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r-- | libavcodec/exr.c | 16 | ||||
-rw-r--r-- | libavcodec/exrdsp.c | 9 | ||||
-rw-r--r-- | libavcodec/exrdsp.h | 1 | ||||
-rw-r--r-- | libavcodec/x86/exrdsp.asm | 62 | ||||
-rw-r--r-- | libavcodec/x86/exrdsp_init.c | 13 | ||||
-rw-r--r-- | tests/checkasm/exrdsp.c | 23 |
6 files changed, 109 insertions, 15 deletions
diff --git a/libavcodec/exr.c b/libavcodec/exr.c index 230d5bbca8..0b755db3cb 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -265,18 +265,6 @@ static inline uint16_t exr_halflt2uint(uint16_t v) return (v + (1 << 16)) >> (exp + 1); } -static void predictor(uint8_t *src, int size) -{ - uint8_t *t = src + 1; - uint8_t *stop = src + size; - - while (t < stop) { - int d = (int) t[-1] + (int) t[0] - 128; - t[0] = d; - ++t; - } -} - static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size, int uncompressed_size, EXRThreadData *td) { @@ -288,7 +276,7 @@ static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size av_assert1(uncompressed_size % 2 == 0); - predictor(td->tmp, uncompressed_size); + s->dsp.predictor(td->tmp, uncompressed_size); s->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size); return 0; @@ -335,7 +323,7 @@ static int rle_uncompress(EXRContext *ctx, const uint8_t *src, int compressed_si av_assert1(uncompressed_size % 2 == 0); - predictor(td->tmp, uncompressed_size); + ctx->dsp.predictor(td->tmp, uncompressed_size); ctx->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size); return 0; diff --git a/libavcodec/exrdsp.c b/libavcodec/exrdsp.c index 871b6f1276..42dbf1f54a 100644 --- a/libavcodec/exrdsp.c +++ b/libavcodec/exrdsp.c @@ -38,9 +38,18 @@ static void reorder_pixels_scalar(uint8_t *dst, const uint8_t *src, ptrdiff_t si } } +static void predictor_scalar(uint8_t *src, ptrdiff_t size) +{ + ptrdiff_t i; + + for (i = 1; i < size; i++) + src[i] += src[i-1] - 128; +} + av_cold void ff_exrdsp_init(ExrDSPContext *c) { c->reorder_pixels = reorder_pixels_scalar; + c->predictor = predictor_scalar; if (ARCH_X86) ff_exrdsp_init_x86(c); diff --git a/libavcodec/exrdsp.h b/libavcodec/exrdsp.h index d8cb002efc..2c4dc3af88 100644 --- a/libavcodec/exrdsp.h +++ b/libavcodec/exrdsp.h @@ -24,6 +24,7 @@ typedef struct ExrDSPContext { void (*reorder_pixels)(uint8_t *dst, const uint8_t *src, ptrdiff_t size); + void (*predictor)(uint8_t *src, ptrdiff_t size); } ExrDSPContext; void ff_exrdsp_init(ExrDSPContext *c); diff --git a/libavcodec/x86/exrdsp.asm b/libavcodec/x86/exrdsp.asm index 06c629e59e..23c9397ef8 100644 --- a/libavcodec/x86/exrdsp.asm +++ b/libavcodec/x86/exrdsp.asm @@ -2,9 +2,11 @@ ;* X86 Optimized functions for Open Exr Decoder ;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC ;* -;* reorder_pixels based on patch by John Loy +;* reorder_pixels, predictor based on patch by John Loy ;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema ;* +;* predictor AVX/AVX2 by Henrik Gramner +;* ;* This file is part of FFmpeg. ;* ;* FFmpeg is free software; you can redistribute it and/or @@ -24,6 +26,9 @@ %include "libavutil/x86/x86util.asm" +cextern pb_15 +cextern pb_80 + SECTION .text ;------------------------------------------------------------------------------ @@ -60,3 +65,58 @@ REORDER_PIXELS INIT_YMM avx2 REORDER_PIXELS %endif + + +;------------------------------------------------------------------------------ +; void ff_predictor(uint8_t *src, ptrdiff_t size); +;------------------------------------------------------------------------------ + +%macro PREDICTOR 0 +cglobal predictor, 2,2,5, src, size +%if mmsize == 32 + vbroadcasti128 m0, [pb_80] +%else + mova xm0, [pb_80] +%endif + mova xm1, [pb_15] + mova xm2, xm0 + add srcq, sizeq + neg sizeq +.loop: + pxor m3, m0, [srcq + sizeq] + pslldq m4, m3, 1 + paddb m3, m4 + pslldq m4, m3, 2 + paddb m3, m4 + pslldq m4, m3, 4 + paddb m3, m4 + pslldq m4, m3, 8 +%if mmsize == 32 + paddb m3, m4 + paddb xm2, xm3 + vextracti128 xm4, m3, 1 + mova [srcq + sizeq], xm2 + pshufb xm2, xm1 + paddb xm2, xm4 + mova [srcq + sizeq + 16], xm2 +%else + paddb m2, m3 + paddb m2, m4 + mova [srcq + sizeq], m2 +%endif + pshufb xm2, xm1 + add sizeq, mmsize + jl .loop + RET +%endmacro + +INIT_XMM ssse3 +PREDICTOR + +INIT_XMM avx +PREDICTOR + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +PREDICTOR +%endif diff --git a/libavcodec/x86/exrdsp_init.c b/libavcodec/x86/exrdsp_init.c index 5669be3d97..63b3480d8f 100644 --- a/libavcodec/x86/exrdsp_init.c +++ b/libavcodec/x86/exrdsp_init.c @@ -26,6 +26,12 @@ void ff_reorder_pixels_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t size); void ff_reorder_pixels_avx2(uint8_t *dst, const uint8_t *src, ptrdiff_t size); +void ff_predictor_ssse3(uint8_t *src, ptrdiff_t size); + +void ff_predictor_avx(uint8_t *src, ptrdiff_t size); + +void ff_predictor_avx2(uint8_t *src, ptrdiff_t size); + av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); @@ -33,7 +39,14 @@ av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp) if (EXTERNAL_SSE2(cpu_flags)) { dsp->reorder_pixels = ff_reorder_pixels_sse2; } + if (EXTERNAL_SSSE3(cpu_flags)) { + dsp->predictor = ff_predictor_ssse3; + } + if (EXTERNAL_AVX(cpu_flags)) { + dsp->predictor = ff_predictor_avx; + } if (EXTERNAL_AVX2_FAST(cpu_flags)) { dsp->reorder_pixels = ff_reorder_pixels_avx2; + dsp->predictor = ff_predictor_avx2; } } diff --git a/tests/checkasm/exrdsp.c b/tests/checkasm/exrdsp.c index 6637f6fdd2..754a079f83 100644 --- a/tests/checkasm/exrdsp.c +++ b/tests/checkasm/exrdsp.c @@ -55,6 +55,24 @@ static void check_reorder_pixels(void) { bench_new(dst_new, src, BUF_SIZE); } +static void check_predictor(void) { + LOCAL_ALIGNED_32(uint8_t, src, [PADDED_BUF_SIZE]); + LOCAL_ALIGNED_32(uint8_t, dst_ref, [PADDED_BUF_SIZE]); + LOCAL_ALIGNED_32(uint8_t, dst_new, [PADDED_BUF_SIZE]); + + declare_func(void, uint8_t *src, ptrdiff_t size); + + memset(src, 0, PADDED_BUF_SIZE); + randomize_buffers(); + memcpy(dst_ref, src, PADDED_BUF_SIZE); + memcpy(dst_new, src, PADDED_BUF_SIZE); + call_ref(dst_ref, BUF_SIZE); + call_new(dst_new, BUF_SIZE); + if (memcmp(dst_ref, dst_new, BUF_SIZE)) + fail(); + bench_new(dst_new, BUF_SIZE); +} + void checkasm_check_exrdsp(void) { ExrDSPContext h; @@ -65,4 +83,9 @@ void checkasm_check_exrdsp(void) check_reorder_pixels(); report("reorder_pixels"); + + if (check_func(h.predictor, "predictor")) + check_predictor(); + + report("predictor"); } |