diff options
author | Martin Storsjö <martin@martin.st> | 2020-05-12 11:22:45 +0300 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2020-05-13 13:20:08 +0300 |
commit | 353aecbb28e3976b6f4d7a4262398852cd67b5a2 (patch) | |
tree | d76f7b9f9a5349f1ecbd4b35340427a849aa091a | |
parent | b12b05374f7025167e2c43449ceb8ba3f0a6083f (diff) | |
download | ffmpeg-353aecbb28e3976b6f4d7a4262398852cd67b5a2.tar.gz |
pixblockdsp, avdct: Add get_pixels_unaligned
Use this in vf_spp.c, where the get_pixels operation is done on
unaligned source addresses.
Hook up the x86 (mmx and sse) versions of get_pixels to this
function pointer, as those implementations seem to support unaligned
use.
This fixes fate-filter-spp on armv7.
Signed-off-by: Martin Storsjö <martin@martin.st>
-rw-r--r-- | libavcodec/avdct.c | 1 | ||||
-rw-r--r-- | libavcodec/avdct.h | 4 | ||||
-rw-r--r-- | libavcodec/pixblockdsp.c | 2 | ||||
-rw-r--r-- | libavcodec/pixblockdsp.h | 3 | ||||
-rw-r--r-- | libavcodec/x86/pixblockdsp_init.c | 8 | ||||
-rw-r--r-- | libavfilter/vf_spp.c | 2 |
6 files changed, 17 insertions, 3 deletions
diff --git a/libavcodec/avdct.c b/libavcodec/avdct.c index 7c761cf39a..e8fa41f73b 100644 --- a/libavcodec/avdct.c +++ b/libavcodec/avdct.c @@ -120,6 +120,7 @@ int avcodec_dct_init(AVDCT *dsp) PixblockDSPContext pdsp; ff_pixblockdsp_init(&pdsp, avctx); COPY(pdsp, get_pixels); + COPY(pdsp, get_pixels_unaligned); } #endif diff --git a/libavcodec/avdct.h b/libavcodec/avdct.h index 272422e44c..6411fab6f6 100644 --- a/libavcodec/avdct.h +++ b/libavcodec/avdct.h @@ -67,6 +67,10 @@ typedef struct AVDCT { ptrdiff_t line_size); int bits_per_sample; + + void (*get_pixels_unaligned)(int16_t *block /* align 16 */, + const uint8_t *pixels, + ptrdiff_t line_size); } AVDCT; /** diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c index 50e1d1d735..a79e547776 100644 --- a/libavcodec/pixblockdsp.c +++ b/libavcodec/pixblockdsp.c @@ -90,10 +90,12 @@ av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx) case 10: case 12: case 14: + c->get_pixels_unaligned = c->get_pixels = get_pixels_16_c; break; default: if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) { + c->get_pixels_unaligned = c->get_pixels = get_pixels_8_c; } break; diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h index e036700ff0..fddb467212 100644 --- a/libavcodec/pixblockdsp.h +++ b/libavcodec/pixblockdsp.h @@ -29,6 +29,9 @@ typedef struct PixblockDSPContext { void (*get_pixels)(int16_t *av_restrict block /* align 16 */, const uint8_t *pixels /* align 8 */, ptrdiff_t stride); + void (*get_pixels_unaligned)(int16_t *av_restrict block /* align 16 */, + const uint8_t *pixels, + ptrdiff_t stride); void (*diff_pixels)(int16_t *av_restrict block /* align 16 */, const uint8_t *s1 /* align 8 */, const uint8_t *s2 /* align 8 */, diff --git a/libavcodec/x86/pixblockdsp_init.c b/libavcodec/x86/pixblockdsp_init.c index ade55e01a3..3a5eb6959c 100644 --- a/libavcodec/x86/pixblockdsp_init.c +++ b/libavcodec/x86/pixblockdsp_init.c @@ -37,15 +37,19 @@ av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c, int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_MMX(cpu_flags)) { - if (!high_bit_depth) + if (!high_bit_depth) { + c->get_pixels_unaligned = c->get_pixels = ff_get_pixels_mmx; + } c->diff_pixels_unaligned = c->diff_pixels = ff_diff_pixels_mmx; } if (EXTERNAL_SSE2(cpu_flags)) { - if (!high_bit_depth) + if (!high_bit_depth) { + c->get_pixels_unaligned = c->get_pixels = ff_get_pixels_sse2; + } c->diff_pixels_unaligned = c->diff_pixels = ff_diff_pixels_sse2; } diff --git a/libavfilter/vf_spp.c b/libavfilter/vf_spp.c index 6bee91b309..a83b1195c0 100644 --- a/libavfilter/vf_spp.c +++ b/libavfilter/vf_spp.c @@ -283,7 +283,7 @@ static void filter(SPPContext *p, uint8_t *dst, uint8_t *src, const int x1 = x + offset[i + count - 1][0]; const int y1 = y + offset[i + count - 1][1]; const int index = x1 + y1*linesize; - p->dct->get_pixels(block, p->src + sample_bytes*index, sample_bytes*linesize); + p->dct->get_pixels_unaligned(block, p->src + sample_bytes*index, sample_bytes*linesize); p->dct->fdct(block); p->requantize(block2, block, qp, p->dct->idct_permutation); p->dct->idct(block2); |