aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Vignali <martin.vignali@gmail.com>2017-10-01 21:37:15 +0200
committerJames Almer <jamrial@gmail.com>2017-10-01 17:35:30 -0300
commitac5908b13f16cbda396730c35f5f3125ca24577a (patch)
tree01f49a554ed8199b7aaa5d095c859796c6c10c18
parent59924d5eb11646f82f70c206be8a867468f102b9 (diff)
downloadffmpeg-ac5908b13f16cbda396730c35f5f3125ca24577a.tar.gz
libavcodec/exr : add x86 SIMD for predictor
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r--libavcodec/exr.c16
-rw-r--r--libavcodec/exrdsp.c9
-rw-r--r--libavcodec/exrdsp.h1
-rw-r--r--libavcodec/x86/exrdsp.asm62
-rw-r--r--libavcodec/x86/exrdsp_init.c13
-rw-r--r--tests/checkasm/exrdsp.c23
6 files changed, 109 insertions, 15 deletions
diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 230d5bbca8..0b755db3cb 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -265,18 +265,6 @@ static inline uint16_t exr_halflt2uint(uint16_t v)
return (v + (1 << 16)) >> (exp + 1);
}
-static void predictor(uint8_t *src, int size)
-{
- uint8_t *t = src + 1;
- uint8_t *stop = src + size;
-
- while (t < stop) {
- int d = (int) t[-1] + (int) t[0] - 128;
- t[0] = d;
- ++t;
- }
-}
-
static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size,
int uncompressed_size, EXRThreadData *td)
{
@@ -288,7 +276,7 @@ static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size
av_assert1(uncompressed_size % 2 == 0);
- predictor(td->tmp, uncompressed_size);
+ s->dsp.predictor(td->tmp, uncompressed_size);
s->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size);
return 0;
@@ -335,7 +323,7 @@ static int rle_uncompress(EXRContext *ctx, const uint8_t *src, int compressed_si
av_assert1(uncompressed_size % 2 == 0);
- predictor(td->tmp, uncompressed_size);
+ ctx->dsp.predictor(td->tmp, uncompressed_size);
ctx->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size);
return 0;
diff --git a/libavcodec/exrdsp.c b/libavcodec/exrdsp.c
index 871b6f1276..42dbf1f54a 100644
--- a/libavcodec/exrdsp.c
+++ b/libavcodec/exrdsp.c
@@ -38,9 +38,18 @@ static void reorder_pixels_scalar(uint8_t *dst, const uint8_t *src, ptrdiff_t si
}
}
+static void predictor_scalar(uint8_t *src, ptrdiff_t size)
+{
+ ptrdiff_t i;
+
+ for (i = 1; i < size; i++)
+ src[i] += src[i-1] - 128;
+}
+
av_cold void ff_exrdsp_init(ExrDSPContext *c)
{
c->reorder_pixels = reorder_pixels_scalar;
+ c->predictor = predictor_scalar;
if (ARCH_X86)
ff_exrdsp_init_x86(c);
diff --git a/libavcodec/exrdsp.h b/libavcodec/exrdsp.h
index d8cb002efc..2c4dc3af88 100644
--- a/libavcodec/exrdsp.h
+++ b/libavcodec/exrdsp.h
@@ -24,6 +24,7 @@
typedef struct ExrDSPContext {
void (*reorder_pixels)(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
+ void (*predictor)(uint8_t *src, ptrdiff_t size);
} ExrDSPContext;
void ff_exrdsp_init(ExrDSPContext *c);
diff --git a/libavcodec/x86/exrdsp.asm b/libavcodec/x86/exrdsp.asm
index 06c629e59e..23c9397ef8 100644
--- a/libavcodec/x86/exrdsp.asm
+++ b/libavcodec/x86/exrdsp.asm
@@ -2,9 +2,11 @@
;* X86 Optimized functions for Open Exr Decoder
;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC
;*
-;* reorder_pixels based on patch by John Loy
+;* reorder_pixels, predictor based on patch by John Loy
;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema
;*
+;* predictor AVX/AVX2 by Henrik Gramner
+;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
@@ -24,6 +26,9 @@
%include "libavutil/x86/x86util.asm"
+cextern pb_15
+cextern pb_80
+
SECTION .text
;------------------------------------------------------------------------------
@@ -60,3 +65,58 @@ REORDER_PIXELS
INIT_YMM avx2
REORDER_PIXELS
%endif
+
+
+;------------------------------------------------------------------------------
+; void ff_predictor(uint8_t *src, ptrdiff_t size);
+;------------------------------------------------------------------------------
+
+%macro PREDICTOR 0
+cglobal predictor, 2,2,5, src, size
+%if mmsize == 32
+ vbroadcasti128 m0, [pb_80]
+%else
+ mova xm0, [pb_80]
+%endif
+ mova xm1, [pb_15]
+ mova xm2, xm0
+ add srcq, sizeq
+ neg sizeq
+.loop:
+ pxor m3, m0, [srcq + sizeq]
+ pslldq m4, m3, 1
+ paddb m3, m4
+ pslldq m4, m3, 2
+ paddb m3, m4
+ pslldq m4, m3, 4
+ paddb m3, m4
+ pslldq m4, m3, 8
+%if mmsize == 32
+ paddb m3, m4
+ paddb xm2, xm3
+ vextracti128 xm4, m3, 1
+ mova [srcq + sizeq], xm2
+ pshufb xm2, xm1
+ paddb xm2, xm4
+ mova [srcq + sizeq + 16], xm2
+%else
+ paddb m2, m3
+ paddb m2, m4
+ mova [srcq + sizeq], m2
+%endif
+ pshufb xm2, xm1
+ add sizeq, mmsize
+ jl .loop
+ RET
+%endmacro
+
+INIT_XMM ssse3
+PREDICTOR
+
+INIT_XMM avx
+PREDICTOR
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+PREDICTOR
+%endif
diff --git a/libavcodec/x86/exrdsp_init.c b/libavcodec/x86/exrdsp_init.c
index 5669be3d97..63b3480d8f 100644
--- a/libavcodec/x86/exrdsp_init.c
+++ b/libavcodec/x86/exrdsp_init.c
@@ -26,6 +26,12 @@ void ff_reorder_pixels_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
void ff_reorder_pixels_avx2(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
+void ff_predictor_ssse3(uint8_t *src, ptrdiff_t size);
+
+void ff_predictor_avx(uint8_t *src, ptrdiff_t size);
+
+void ff_predictor_avx2(uint8_t *src, ptrdiff_t size);
+
av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
{
int cpu_flags = av_get_cpu_flags();
@@ -33,7 +39,14 @@ av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
if (EXTERNAL_SSE2(cpu_flags)) {
dsp->reorder_pixels = ff_reorder_pixels_sse2;
}
+ if (EXTERNAL_SSSE3(cpu_flags)) {
+ dsp->predictor = ff_predictor_ssse3;
+ }
+ if (EXTERNAL_AVX(cpu_flags)) {
+ dsp->predictor = ff_predictor_avx;
+ }
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
dsp->reorder_pixels = ff_reorder_pixels_avx2;
+ dsp->predictor = ff_predictor_avx2;
}
}
diff --git a/tests/checkasm/exrdsp.c b/tests/checkasm/exrdsp.c
index 6637f6fdd2..754a079f83 100644
--- a/tests/checkasm/exrdsp.c
+++ b/tests/checkasm/exrdsp.c
@@ -55,6 +55,24 @@ static void check_reorder_pixels(void) {
bench_new(dst_new, src, BUF_SIZE);
}
+static void check_predictor(void) {
+ LOCAL_ALIGNED_32(uint8_t, src, [PADDED_BUF_SIZE]);
+ LOCAL_ALIGNED_32(uint8_t, dst_ref, [PADDED_BUF_SIZE]);
+ LOCAL_ALIGNED_32(uint8_t, dst_new, [PADDED_BUF_SIZE]);
+
+ declare_func(void, uint8_t *src, ptrdiff_t size);
+
+ memset(src, 0, PADDED_BUF_SIZE);
+ randomize_buffers();
+ memcpy(dst_ref, src, PADDED_BUF_SIZE);
+ memcpy(dst_new, src, PADDED_BUF_SIZE);
+ call_ref(dst_ref, BUF_SIZE);
+ call_new(dst_new, BUF_SIZE);
+ if (memcmp(dst_ref, dst_new, BUF_SIZE))
+ fail();
+ bench_new(dst_new, BUF_SIZE);
+}
+
void checkasm_check_exrdsp(void)
{
ExrDSPContext h;
@@ -65,4 +83,9 @@ void checkasm_check_exrdsp(void)
check_reorder_pixels();
report("reorder_pixels");
+
+ if (check_func(h.predictor, "predictor"))
+ check_predictor();
+
+ report("predictor");
}