aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Avison <bavison@riscosopen.org>2013-08-05 13:12:47 +0100
committerMartin Storsjö <martin@martin.st>2013-08-08 12:08:30 +0300
commit218d6844b37d339ffbf2044ad07d8be7767e2734 (patch)
treebc01481714ca5d7550ecb00d454a4c220cf1e959
parent7a82022ee2f9b1fad991ace0936901e7419444be (diff)
downloadffmpeg-218d6844b37d339ffbf2044ad07d8be7767e2734.tar.gz
h264dsp: Factorize code into a new function, h264_find_start_code_candidate
This performs the start code search which was previously part of h264_find_frame_end() - the most CPU intensive part of the function. By itself, this results in a performance regression: Before After Mean StdDev Mean StdDev Change Overall time 2925.6 26.2 3068.5 31.7 -4.7% but this can more than be made up for by platform-optimised implementations of the function. Signed-off-by: Martin Storsjö <martin@martin.st>
-rw-r--r--libavcodec/h264_parser.c27
-rw-r--r--libavcodec/h264dsp.c29
-rw-r--r--libavcodec/h264dsp.h9
3 files changed, 41 insertions, 24 deletions
diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
index da2a5f99db..ef5da98934 100644
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@@ -47,30 +47,9 @@ static int h264_find_frame_end(H264Context *h, const uint8_t *buf,
for (i = 0; i < buf_size; i++) {
if (state == 7) {
-#if HAVE_FAST_UNALIGNED
- /* we check i < buf_size instead of i + 3 / 7 because it is
- * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
- * bytes at the end.
- */
-#if HAVE_FAST_64BIT
- while (i < buf_size &&
- !((~*(const uint64_t *)(buf + i) &
- (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
- 0x8080808080808080ULL))
- i += 8;
-#else
- while (i < buf_size &&
- !((~*(const uint32_t *)(buf + i) &
- (*(const uint32_t *)(buf + i) - 0x01010101U)) &
- 0x80808080U))
- i += 4;
-#endif
-#endif
- for (; i < buf_size; i++)
- if (!buf[i]) {
- state = 2;
- break;
- }
+ i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i);
+ if (i < buf_size)
+ state = 2;
} else if (state <= 2) {
if (buf[i] == 1)
state ^= 5; // 2->7, 1->4, 0->5
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index 3ca6abefda..a901dbb9e1 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -53,6 +53,34 @@
#include "h264addpx_template.c"
#undef BIT_DEPTH
+static int h264_find_start_code_candidate_c(const uint8_t *buf, int size)
+{
+ int i = 0;
+#if HAVE_FAST_UNALIGNED
+ /* we check i < size instead of i + 3 / 7 because it is
+ * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
+ * bytes at the end.
+ */
+#if HAVE_FAST_64BIT
+ while (i < size &&
+ !((~*(const uint64_t *)(buf + i) &
+ (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
+ 0x8080808080808080ULL))
+ i += 8;
+#else
+ while (i < size &&
+ !((~*(const uint32_t *)(buf + i) &
+ (*(const uint32_t *)(buf + i) - 0x01010101U)) &
+ 0x80808080U))
+ i += 4;
+#endif
+#endif
+ for (; i < size; i++)
+ if (!buf[i])
+ break;
+ return i;
+}
+
av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
const int chroma_format_idc)
{
@@ -133,6 +161,7 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
H264_DSP(8);
break;
}
+ c->h264_find_start_code_candidate = h264_find_start_code_candidate_c;
if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc);
if (ARCH_PPC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc);
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 1f9f8fe823..6249ba70d5 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -105,6 +105,15 @@ typedef struct H264DSPContext {
/* bypass-transform */
void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride);
void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride);
+
+ /**
+ * Search buf from the start for up to size bytes. Return the index
+ * of a zero byte, or >= size if not found. Ideally, use lookahead
+ * to filter out any zero bytes that are known to not be followed by
+ * one or more further zero bytes and a one byte. Better still, filter
+ * out any bytes that form the trailing_zero_8bits syntax element too.
+ */
+ int (*h264_find_start_code_candidate)(const uint8_t *buf, int size);
} H264DSPContext;
void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,