diff options
author | Ben Avison <bavison@riscosopen.org> | 2022-03-31 18:23:45 +0100 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2022-04-01 10:03:33 +0300 |
commit | 2e268477802d64aa75b9c3c2cb2fc89d1ef7c87d (patch) | |
tree | 5b5ccbd62a2cb87808a197864b63f872a8ed9d99 | |
parent | bd3615a81a3387cafb51444927e852423f8f4a6e (diff) | |
download | ffmpeg-2e268477802d64aa75b9c3c2cb2fc89d1ef7c87d.tar.gz |
avcodec/vc1: Introduce fast path for unescaping bitstream buffer
Includes a checkasm test.
Signed-off-by: Ben Avison <bavison@riscosopen.org>
Signed-off-by: Martin Storsjö <martin@martin.st>
-rw-r--r-- | libavcodec/vc1dec.c | 20 | ||||
-rw-r--r-- | libavcodec/vc1dsp.c | 2 | ||||
-rw-r--r-- | libavcodec/vc1dsp.h | 3 | ||||
-rw-r--r-- | tests/checkasm/vc1dsp.c | 67 |
4 files changed, 82 insertions, 10 deletions
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index e279ffd1c1..0426e8a752 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -491,7 +491,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx) size = next - start - 4; if (size <= 0) continue; - buf2_size = vc1_unescape_buffer(start + 4, size, buf2); + buf2_size = v->vc1dsp.vc1_unescape_buffer(start + 4, size, buf2); init_get_bits(&gb, buf2, buf2_size * 8); switch (AV_RB32(start)) { case VC1_CODE_SEQHDR: @@ -681,7 +681,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, case VC1_CODE_FRAME: if (avctx->hwaccel) buf_start = start; - buf_size2 = vc1_unescape_buffer(start + 4, size, buf2); + buf_size2 = v->vc1dsp.vc1_unescape_buffer(start + 4, size, buf2); break; case VC1_CODE_FIELD: { int buf_size3; @@ -698,8 +698,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, ret = AVERROR(ENOMEM); goto err; } - buf_size3 = vc1_unescape_buffer(start + 4, size, - slices[n_slices].buf); + buf_size3 = v->vc1dsp.vc1_unescape_buffer(start + 4, size, + slices[n_slices].buf); init_get_bits(&slices[n_slices].gb, slices[n_slices].buf, buf_size3 << 3); slices[n_slices].mby_start = avctx->coded_height + 31 >> 5; @@ -710,7 +710,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, break; } case VC1_CODE_ENTRYPOINT: /* it should be before frame data */ - buf_size2 = vc1_unescape_buffer(start + 4, size, buf2); + buf_size2 = v->vc1dsp.vc1_unescape_buffer(start + 4, size, buf2); init_get_bits(&s->gb, buf2, buf_size2 * 8); ff_vc1_decode_entry_point(avctx, v, &s->gb); break; @@ -727,8 +727,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, ret = AVERROR(ENOMEM); goto err; } - buf_size3 = vc1_unescape_buffer(start + 4, size, - slices[n_slices].buf); + buf_size3 = v->vc1dsp.vc1_unescape_buffer(start + 4, size, + slices[n_slices].buf); init_get_bits(&slices[n_slices].gb, slices[n_slices].buf, buf_size3 << 3); slices[n_slices].mby_start = get_bits(&slices[n_slices].gb, 9); @@ -762,7 +762,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, ret = AVERROR(ENOMEM); goto err; } - buf_size3 = vc1_unescape_buffer(divider + 4, buf + buf_size - divider - 4, slices[n_slices].buf); + buf_size3 = v->vc1dsp.vc1_unescape_buffer(divider + 4, buf + buf_size - divider - 4, slices[n_slices].buf); init_get_bits(&slices[n_slices].gb, slices[n_slices].buf, buf_size3 << 3); slices[n_slices].mby_start = s->mb_height + 1 >> 1; @@ -771,9 +771,9 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, n_slices1 = n_slices - 1; n_slices++; } - buf_size2 = vc1_unescape_buffer(buf, divider - buf, buf2); + buf_size2 = v->vc1dsp.vc1_unescape_buffer(buf, divider - buf, buf2); } else { - buf_size2 = vc1_unescape_buffer(buf, buf_size, buf2); + buf_size2 = v->vc1dsp.vc1_unescape_buffer(buf, buf_size, buf2); } init_get_bits(&s->gb, buf2, buf_size2*8); } else{ diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c index f651d7d461..f1b7bb2397 100644 --- a/libavcodec/vc1dsp.c +++ b/libavcodec/vc1dsp.c @@ -34,6 +34,7 @@ #include "rnd_avg.h" #include "vc1dsp.h" #include "startcode.h" +#include "vc1_common.h" /* Apply overlap transform to horizontal edge */ static void vc1_v_overlap_c(uint8_t *src, ptrdiff_t stride) @@ -1030,6 +1031,7 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp) #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */ dsp->startcode_find_candidate = ff_startcode_find_candidate_c; + dsp->vc1_unescape_buffer = vc1_unescape_buffer; if (ARCH_AARCH64) ff_vc1dsp_init_aarch64(dsp); diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h index fe60025a2a..7ed1776ca7 100644 --- a/libavcodec/vc1dsp.h +++ b/libavcodec/vc1dsp.h @@ -80,6 +80,9 @@ typedef struct VC1DSPContext { * one or more further zero bytes and a one byte. */ int (*startcode_find_candidate)(const uint8_t *buf, int size); + + /* Copy a buffer, removing startcode emulation escape bytes as we go */ + int (*vc1_unescape_buffer)(const uint8_t *src, int size, uint8_t *dst); } VC1DSPContext; void ff_vc1dsp_init(VC1DSPContext* c); diff --git a/tests/checkasm/vc1dsp.c b/tests/checkasm/vc1dsp.c index 7d4457306f..52628d15e4 100644 --- a/tests/checkasm/vc1dsp.c +++ b/tests/checkasm/vc1dsp.c @@ -374,6 +374,70 @@ static void check_loop_filter(void) } } +#define TEST_UNESCAPE \ + do { \ + for (int count = 100; count > 0; --count) { \ + escaped_offset = rnd() & 7; \ + unescaped_offset = rnd() & 7; \ + escaped_len = (1u << (rnd() % 8) + 3) - (rnd() & 7); \ + RANDOMIZE_BUFFER8(unescaped, UNESCAPE_BUF_SIZE); \ + len0 = call_ref(escaped0 + escaped_offset, escaped_len, unescaped0 + unescaped_offset); \ + len1 = call_new(escaped1 + escaped_offset, escaped_len, unescaped1 + unescaped_offset); \ + if (len0 != len1 || memcmp(unescaped0, unescaped1, UNESCAPE_BUF_SIZE)) \ + fail(); \ + } \ + } while (0) + +static void check_unescape(void) +{ + /* This appears to be a typical length of buffer in use */ +#define LOG2_UNESCAPE_BUF_SIZE 17 +#define UNESCAPE_BUF_SIZE (1u<<LOG2_UNESCAPE_BUF_SIZE) + LOCAL_ALIGNED_8(uint8_t, escaped0, [UNESCAPE_BUF_SIZE]); + LOCAL_ALIGNED_8(uint8_t, escaped1, [UNESCAPE_BUF_SIZE]); + LOCAL_ALIGNED_8(uint8_t, unescaped0, [UNESCAPE_BUF_SIZE]); + LOCAL_ALIGNED_8(uint8_t, unescaped1, [UNESCAPE_BUF_SIZE]); + + VC1DSPContext h; + + ff_vc1dsp_init(&h); + + if (check_func(h.vc1_unescape_buffer, "vc1dsp.vc1_unescape_buffer")) { + int len0, len1, escaped_offset, unescaped_offset, escaped_len; + declare_func_emms(AV_CPU_FLAG_MMX, int, const uint8_t *, int, uint8_t *); + + /* Test data which consists of escapes sequences packed as tightly as possible */ + for (int x = 0; x < UNESCAPE_BUF_SIZE; ++x) + escaped1[x] = escaped0[x] = 3 * (x % 3 == 0); + TEST_UNESCAPE; + + /* Test random data */ + RANDOMIZE_BUFFER8(escaped, UNESCAPE_BUF_SIZE); + TEST_UNESCAPE; + + /* Test data with escape sequences at random intervals */ + for (int x = 0; x <= UNESCAPE_BUF_SIZE - 4;) { + int gap, gap_msb; + escaped1[x+0] = escaped0[x+0] = 0; + escaped1[x+1] = escaped0[x+1] = 0; + escaped1[x+2] = escaped0[x+2] = 3; + escaped1[x+3] = escaped0[x+3] = rnd() & 3; + gap_msb = 2u << (rnd() % 8); + gap = (rnd() &~ -gap_msb) | gap_msb; + x += gap; + } + TEST_UNESCAPE; + + /* Test data which is known to contain no escape sequences */ + memset(escaped0, 0xFF, UNESCAPE_BUF_SIZE); + memset(escaped1, 0xFF, UNESCAPE_BUF_SIZE); + TEST_UNESCAPE; + + /* Benchmark the no-escape-sequences case */ + bench_new(escaped1, UNESCAPE_BUF_SIZE, unescaped1); + } +} + void checkasm_check_vc1dsp(void) { check_inv_trans_inplace(); @@ -382,4 +446,7 @@ void checkasm_check_vc1dsp(void) check_loop_filter(); report("loop_filter"); + + check_unescape(); + report("unescape_buffer"); } |