diff options
author | Mans Rullgard <mans@mansr.com> | 2012-10-26 14:42:23 +0100 |
---|---|---|
committer | Mans Rullgard <mans@mansr.com> | 2012-11-23 12:04:17 +0000 |
commit | 4a606c830ae664013cea33800094d4d0f4ec62da (patch) | |
tree | a4cd85bfb69d4c61ea1a09c96fba09d2dfb8f1d1 /libavutil/mem.c | |
parent | 5e39bb073a1d6fc6ac30b19893beefdd3c7d633f (diff) | |
download | ffmpeg-4a606c830ae664013cea33800094d4d0f4ec62da.tar.gz |
av_memcpy_backptr: optimise some special cases
- Add special cases for offsets of 2, 3, or 4 bytes. This means the
offset is always >4 in the generic case, allowing 32-bit copies to
be used there.
- Don't use memcpy() for sizes less than 16 bytes.
Signed-off-by: Mans Rullgard <mans@mansr.com>
Diffstat (limited to 'libavutil/mem.c')
-rw-r--r-- | libavutil/mem.c | 120 |
1 files changed, 103 insertions, 17 deletions
diff --git a/libavutil/mem.c b/libavutil/mem.c index feba3163b0..b6c0b29319 100644 --- a/libavutil/mem.c +++ b/libavutil/mem.c @@ -180,29 +180,93 @@ char *av_strdup(const char *s) return ptr; } +static void fill16(uint8_t *dst, int len) +{ + uint32_t v = AV_RN16(dst - 2); + + v |= v << 16; + + while (len >= 4) { + AV_WN32(dst, v); + dst += 4; + len -= 4; + } + + while (len--) { + *dst = dst[-2]; + dst++; + } +} + +static void fill24(uint8_t *dst, int len) +{ +#if HAVE_BIGENDIAN + uint32_t v = AV_RB24(dst - 3); + uint32_t a = v << 8 | v >> 16; + uint32_t b = v << 16 | v >> 8; + uint32_t c = v << 24 | v; +#else + uint32_t v = AV_RL24(dst - 3); + uint32_t a = v | v << 24; + uint32_t b = v >> 8 | v << 16; + uint32_t c = v >> 16 | v << 8; +#endif + + while (len >= 12) { + AV_WN32(dst, a); + AV_WN32(dst + 4, b); + AV_WN32(dst + 8, c); + dst += 12; + len -= 12; + } + + if (len >= 4) { + AV_WN32(dst, a); + dst += 4; + len -= 4; + } + + if (len >= 4) { + AV_WN32(dst, b); + dst += 4; + len -= 4; + } + + while (len--) { + *dst = dst[-3]; + dst++; + } +} + +static void fill32(uint8_t *dst, int len) +{ + uint32_t v = AV_RN32(dst - 4); + + while (len >= 4) { + AV_WN32(dst, v); + dst += 4; + len -= 4; + } + + while (len--) { + *dst = dst[-4]; + dst++; + } +} + void av_memcpy_backptr(uint8_t *dst, int back, int cnt) { const uint8_t *src = &dst[-back]; if (back == 1) { memset(dst, *src, cnt); + } else if (back == 2) { + fill16(dst, cnt); + } else if (back == 3) { + fill24(dst, cnt); + } else if (back == 4) { + fill32(dst, cnt); } else { - if (cnt >= 4) { - AV_COPY16U(dst, src); - AV_COPY16U(dst + 2, src + 2); - src += 4; - dst += 4; - cnt -= 4; - } - if (cnt >= 8) { - AV_COPY16U(dst, src); - AV_COPY16U(dst + 2, src + 2); - AV_COPY16U(dst + 4, src + 4); - AV_COPY16U(dst + 6, src + 6); - src += 8; - dst += 8; - cnt -= 8; - } - if (cnt > 0) { + if (cnt >= 16) { int blocklen = back; while (cnt > blocklen) { memcpy(dst, src, blocklen); @@ -211,6 +275,28 @@ void av_memcpy_backptr(uint8_t *dst, int back, int cnt) blocklen <<= 1; } memcpy(dst, src, cnt); + return; + } + if (cnt >= 8) { + AV_COPY32U(dst, src); + AV_COPY32U(dst + 4, src + 4); + src += 8; + dst += 8; + cnt -= 8; + } + if (cnt >= 4) { + AV_COPY32U(dst, src); + src += 4; + dst += 4; + cnt -= 4; + } + if (cnt >= 2) { + AV_COPY16U(dst, src); + src += 2; + dst += 2; + cnt -= 2; } + if (cnt) + *dst = *src; } } |