diff options
author | Timothy Gu <timothygu99@gmail.com> | 2015-09-29 16:50:02 -0700 |
---|---|---|
committer | Timothy Gu <timothygu99@gmail.com> | 2015-10-20 12:36:29 -0700 |
commit | bc22cd244e4c04fb304cfc4fcccb650abbd29ef2 (patch) | |
tree | 27f1de29fad8a5121f26377b432cf7e2e3502118 | |
parent | 00ae5b401b24592a9f7019baada5b349152ee2fc (diff) | |
download | ffmpeg-bc22cd244e4c04fb304cfc4fcccb650abbd29ef2.tar.gz |
dnxhdenc: Optimize get_pixels_8x4_sym for 10-bit
This reverts commit 628e6d0164febc8e69b0f10dfa487e8a2dd1a28a and uses
a better fix.
Before:
4483 decicycles in get_pixels_8x4_sym, 131032 runs, 40 skips
After:
2569 decicycles in get_pixels_8x4_sym, 131054 runs, 18 skips
-rw-r--r-- | libavcodec/dnxhdenc.c | 24 |
1 files changed, 8 insertions, 16 deletions
diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c index 36154acb27..cad4fcff8d 100644 --- a/libavcodec/dnxhdenc.c +++ b/libavcodec/dnxhdenc.c @@ -87,22 +87,14 @@ void dnxhd_10bit_get_pixels_8x4_sym(int16_t *av_restrict block, const uint8_t *pixels, ptrdiff_t line_size) { - int i; - const uint16_t* pixels16 = (const uint16_t*)pixels; - line_size >>= 1; - - for (i = 0; i < 4; i++) { - block[0] = pixels16[0]; block[1] = pixels16[1]; - block[2] = pixels16[2]; block[3] = pixels16[3]; - block[4] = pixels16[4]; block[5] = pixels16[5]; - block[6] = pixels16[6]; block[7] = pixels16[7]; - pixels16 += line_size; - block += 8; - } - memcpy(block, block - 8, sizeof(*block) * 8); - memcpy(block + 8, block - 16, sizeof(*block) * 8); - memcpy(block + 16, block - 24, sizeof(*block) * 8); - memcpy(block + 24, block - 32, sizeof(*block) * 8); + memcpy(block + 0 * 8, pixels + 0 * line_size, 8 * sizeof(*block)); + memcpy(block + 7 * 8, pixels + 0 * line_size, 8 * sizeof(*block)); + memcpy(block + 1 * 8, pixels + 1 * line_size, 8 * sizeof(*block)); + memcpy(block + 6 * 8, pixels + 1 * line_size, 8 * sizeof(*block)); + memcpy(block + 2 * 8, pixels + 2 * line_size, 8 * sizeof(*block)); + memcpy(block + 5 * 8, pixels + 2 * line_size, 8 * sizeof(*block)); + memcpy(block + 3 * 8, pixels + 3 * line_size, 8 * sizeof(*block)); + memcpy(block + 4 * 8, pixels + 3 * line_size, 8 * sizeof(*block)); } static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block, |