aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2020-05-12 11:22:45 +0300
committerMartin Storsjö <martin@martin.st>2020-05-13 13:20:08 +0300
commit353aecbb28e3976b6f4d7a4262398852cd67b5a2 (patch)
treed76f7b9f9a5349f1ecbd4b35340427a849aa091a
parentb12b05374f7025167e2c43449ceb8ba3f0a6083f (diff)
downloadffmpeg-353aecbb28e3976b6f4d7a4262398852cd67b5a2.tar.gz
pixblockdsp, avdct: Add get_pixels_unaligned
Use this in vf_spp.c, where the get_pixels operation is done on unaligned source addresses. Hook up the x86 (mmx and sse) versions of get_pixels to this function pointer, as those implementations seem to support unaligned use. This fixes fate-filter-spp on armv7. Signed-off-by: Martin Storsjö <martin@martin.st>
-rw-r--r--libavcodec/avdct.c1
-rw-r--r--libavcodec/avdct.h4
-rw-r--r--libavcodec/pixblockdsp.c2
-rw-r--r--libavcodec/pixblockdsp.h3
-rw-r--r--libavcodec/x86/pixblockdsp_init.c8
-rw-r--r--libavfilter/vf_spp.c2
6 files changed, 17 insertions, 3 deletions
diff --git a/libavcodec/avdct.c b/libavcodec/avdct.c
index 7c761cf39a..e8fa41f73b 100644
--- a/libavcodec/avdct.c
+++ b/libavcodec/avdct.c
@@ -120,6 +120,7 @@ int avcodec_dct_init(AVDCT *dsp)
PixblockDSPContext pdsp;
ff_pixblockdsp_init(&pdsp, avctx);
COPY(pdsp, get_pixels);
+ COPY(pdsp, get_pixels_unaligned);
}
#endif
diff --git a/libavcodec/avdct.h b/libavcodec/avdct.h
index 272422e44c..6411fab6f6 100644
--- a/libavcodec/avdct.h
+++ b/libavcodec/avdct.h
@@ -67,6 +67,10 @@ typedef struct AVDCT {
ptrdiff_t line_size);
int bits_per_sample;
+
+ void (*get_pixels_unaligned)(int16_t *block /* align 16 */,
+ const uint8_t *pixels,
+ ptrdiff_t line_size);
} AVDCT;
/**
diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
index 50e1d1d735..a79e547776 100644
--- a/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@ -90,10 +90,12 @@ av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
case 10:
case 12:
case 14:
+ c->get_pixels_unaligned =
c->get_pixels = get_pixels_16_c;
break;
default:
if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
+ c->get_pixels_unaligned =
c->get_pixels = get_pixels_8_c;
}
break;
diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h
index e036700ff0..fddb467212 100644
--- a/libavcodec/pixblockdsp.h
+++ b/libavcodec/pixblockdsp.h
@@ -29,6 +29,9 @@ typedef struct PixblockDSPContext {
void (*get_pixels)(int16_t *av_restrict block /* align 16 */,
const uint8_t *pixels /* align 8 */,
ptrdiff_t stride);
+ void (*get_pixels_unaligned)(int16_t *av_restrict block /* align 16 */,
+ const uint8_t *pixels,
+ ptrdiff_t stride);
void (*diff_pixels)(int16_t *av_restrict block /* align 16 */,
const uint8_t *s1 /* align 8 */,
const uint8_t *s2 /* align 8 */,
diff --git a/libavcodec/x86/pixblockdsp_init.c b/libavcodec/x86/pixblockdsp_init.c
index ade55e01a3..3a5eb6959c 100644
--- a/libavcodec/x86/pixblockdsp_init.c
+++ b/libavcodec/x86/pixblockdsp_init.c
@@ -37,15 +37,19 @@ av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMX(cpu_flags)) {
- if (!high_bit_depth)
+ if (!high_bit_depth) {
+ c->get_pixels_unaligned =
c->get_pixels = ff_get_pixels_mmx;
+ }
c->diff_pixels_unaligned =
c->diff_pixels = ff_diff_pixels_mmx;
}
if (EXTERNAL_SSE2(cpu_flags)) {
- if (!high_bit_depth)
+ if (!high_bit_depth) {
+ c->get_pixels_unaligned =
c->get_pixels = ff_get_pixels_sse2;
+ }
c->diff_pixels_unaligned =
c->diff_pixels = ff_diff_pixels_sse2;
}
diff --git a/libavfilter/vf_spp.c b/libavfilter/vf_spp.c
index 6bee91b309..a83b1195c0 100644
--- a/libavfilter/vf_spp.c
+++ b/libavfilter/vf_spp.c
@@ -283,7 +283,7 @@ static void filter(SPPContext *p, uint8_t *dst, uint8_t *src,
const int x1 = x + offset[i + count - 1][0];
const int y1 = y + offset[i + count - 1][1];
const int index = x1 + y1*linesize;
- p->dct->get_pixels(block, p->src + sample_bytes*index, sample_bytes*linesize);
+ p->dct->get_pixels_unaligned(block, p->src + sample_bytes*index, sample_bytes*linesize);
p->dct->fdct(block);
p->requantize(block2, block, qp, p->dct->idct_permutation);
p->dct->idct(block2);