diff options
author | Martin Storsjö <martin@martin.st> | 2019-03-12 11:49:18 +0200 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2019-03-21 22:03:46 +0200 |
commit | 0676de935b1e81bc5b5698fef3e7d48ff2ea77ff (patch) | |
tree | 498c3b42c7fba8c35a11f6445c2374481b037af3 | |
parent | f8abf7d4dfa0504f7f65e4f1fd9d22e01cb371cc (diff) | |
download | ffmpeg-0676de935b1e81bc5b5698fef3e7d48ff2ea77ff.tar.gz |
arm: Implement a NEON version of 422 h264_h_loop_filter_chroma
Previously, the 420 version was used even for 422.
This fixes occasional checkasm failures.
Signed-off-by: Martin Storsjö <martin@martin.st>
-rw-r--r-- | libavcodec/arm/h264dsp_init_arm.c | 8 | ||||
-rw-r--r-- | libavcodec/arm/h264dsp_neon.S | 19 |
2 files changed, 26 insertions, 1 deletions
diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c index 7afd350890..617632c59e 100644 --- a/libavcodec/arm/h264dsp_init_arm.c +++ b/libavcodec/arm/h264dsp_init_arm.c @@ -33,6 +33,8 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); +void ff_h264_h_loop_filter_chroma422_neon(uint8_t *pix, int stride, int alpha, + int beta, int8_t *tc0); void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height, int log2_den, int weight, int offset); @@ -76,7 +78,11 @@ static av_cold void h264dsp_init_neon(H264DSPContext *c, const int bit_depth, c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; - c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; + + if (chroma_format_idc <= 1) + c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; + else + c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon; c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon; c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon; diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S index 5e75565b3e..783e0f6580 100644 --- a/libavcodec/arm/h264dsp_neon.S +++ b/libavcodec/arm/h264dsp_neon.S @@ -237,6 +237,7 @@ function ff_h264_h_loop_filter_chroma_neon, export=1 h264_loop_filter_start sub r0, r0, #2 +h_loop_filter_chroma420: vld1.32 {d18[0]}, [r0], r1 vld1.32 {d16[0]}, [r0], r1 vld1.32 {d0[0]}, [r0], r1 @@ -271,6 +272,24 @@ function ff_h264_h_loop_filter_chroma_neon, export=1 bx lr endfunc +function ff_h264_h_loop_filter_chroma422_neon, export=1 + h264_loop_filter_start + push {r4, lr} + add r4, r0, r1 + add r1, r1, r1 + sub r0, r0, #2 + + bl h_loop_filter_chroma420 + + ldr r12, [sp, #8] + ldr r12, [r12] + vmov.32 d24[0], r12 + sub r0, r4, #2 + + bl h_loop_filter_chroma420 + pop {r4, pc} +endfunc + @ Biweighted prediction .macro biweight_16 macs, macd |