diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2014-12-06 01:46:39 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-12-06 01:54:10 +0100 |
commit | 1d048f762d2fc4ede5316818f467e2f1d1065ac1 (patch) | |
tree | 3667fa5304938b04abb0b636f32ae7cfe0e55657 | |
parent | e827f65618f601ae2016e6f8ac8851534ed346c8 (diff) | |
parent | 9a738c27dceb4b975784b23213a46f5cb560d1c2 (diff) | |
download | ffmpeg-1d048f762d2fc4ede5316818f467e2f1d1065ac1.tar.gz |
Merge commit '9a738c27dceb4b975784b23213a46f5cb560d1c2'
* commit '9a738c27dceb4b975784b23213a46f5cb560d1c2':
v210enc: Add SIMD optimised 8-bit and 10-bit encoders
Conflicts:
libavcodec/v210enc.c
libavcodec/v210enc.h
libavcodec/x86/Makefile
libavcodec/x86/v210enc.asm
libavcodec/x86/v210enc_init.c
tests/ref/vsynth/vsynth1-v210
tests/ref/vsynth/vsynth2-v210
See: 36091742d182b3ad4411aae22682354b3834a974
Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/v210enc.c | 40 | ||||
-rw-r--r-- | libavcodec/v210enc.h | 6 | ||||
-rw-r--r-- | libavcodec/x86/v210enc_init.c | 15 |
3 files changed, 33 insertions, 28 deletions
diff --git a/libavcodec/v210enc.c b/libavcodec/v210enc.c index 0d40f99993..b17bc2c3e7 100644 --- a/libavcodec/v210enc.c +++ b/libavcodec/v210enc.c @@ -31,7 +31,7 @@ #define WRITE_PIXELS(a, b, c) \ do { \ - val = CLIP(*a++); \ + val = CLIP(*a++); \ val |= (CLIP(*b++) << 10) | \ (CLIP(*c++) << 20); \ AV_WL32(dst, val); \ @@ -40,21 +40,22 @@ #define WRITE_PIXELS8(a, b, c) \ do { \ - val = (CLIP8(*a++) << 2); \ - val |= (CLIP8(*b++) << 12) | \ - (CLIP8(*c++) << 22); \ + val = (CLIP8(*a++) << 2); \ + val |= (CLIP8(*b++) << 12) | \ + (CLIP8(*c++) << 22); \ AV_WL32(dst, val); \ dst += 4; \ } while (0) static void v210_planar_pack_8_c(const uint8_t *y, const uint8_t *u, - const uint8_t *v, uint8_t *dst, ptrdiff_t width) + const uint8_t *v, uint8_t *dst, + ptrdiff_t width) { uint32_t val; int i; /* unroll this to match the assembly */ - for( i = 0; i < width-11; i += 12 ){ + for (i = 0; i < width - 11; i += 12) { WRITE_PIXELS8(u, y, v); WRITE_PIXELS8(y, u, y); WRITE_PIXELS8(v, y, u); @@ -67,12 +68,13 @@ static void v210_planar_pack_8_c(const uint8_t *y, const uint8_t *u, } static void v210_planar_pack_10_c(const uint16_t *y, const uint16_t *u, - const uint16_t *v, uint8_t *dst, ptrdiff_t width) + const uint16_t *v, uint8_t *dst, + ptrdiff_t width) { uint32_t val; int i; - for( i = 0; i < width-5; i += 6 ){ + for (i = 0; i < width - 5; i += 6) { WRITE_PIXELS(u, y, v); WRITE_PIXELS(y, u, y); WRITE_PIXELS(v, y, u); @@ -95,8 +97,8 @@ static av_cold int encode_init(AVCodecContext *avctx) avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I; - s->pack_line_8 = v210_planar_pack_8_c; - s->pack_line_10 = v210_planar_pack_10_c; + s->pack_line_8 = v210_planar_pack_8_c; + s->pack_line_10 = v210_planar_pack_10_c; if (ARCH_X86) ff_v210enc_init_x86(s); @@ -108,24 +110,23 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *pic, int *got_packet) { V210EncContext *s = avctx->priv_data; - int aligned_width = ((avctx->width + 47) / 48) * 48; int stride = aligned_width * 8 / 3; int line_padding = stride - ((avctx->width * 8 + 11) / 12) * 4; int h, w, ret; uint8_t *dst; - if ((ret = ff_alloc_packet(pkt, avctx->height * stride)) < 0) { + ret = ff_alloc_packet(pkt, avctx->height * stride); + if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n"); return ret; } - dst = pkt->data; if (pic->format == AV_PIX_FMT_YUV422P10) { - const uint16_t *y = (const uint16_t*)pic->data[0]; - const uint16_t *u = (const uint16_t*)pic->data[1]; - const uint16_t *v = (const uint16_t*)pic->data[2]; + const uint16_t *y = (const uint16_t *)pic->data[0]; + const uint16_t *u = (const uint16_t *)pic->data[1]; + const uint16_t *v = (const uint16_t *)pic->data[2]; for (h = 0; h < avctx->height; h++) { uint32_t val; w = (avctx->width / 6) * 6; @@ -156,13 +157,11 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, memset(dst, 0, line_padding); dst += line_padding; - y += pic->linesize[0] / 2 - avctx->width; u += pic->linesize[1] / 2 - avctx->width / 2; v += pic->linesize[2] / 2 - avctx->width / 2; } - } - else if(pic->format == AV_PIX_FMT_YUV422P) { + } else if(pic->format == AV_PIX_FMT_YUV422P) { const uint8_t *y = pic->data[0]; const uint8_t *u = pic->data[1]; const uint8_t *v = pic->data[2]; @@ -176,7 +175,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, v += w >> 1; dst += (w / 12) * 32; - for( ; w < avctx->width-5; w += 6 ){ + for (; w < avctx->width - 5; w += 6) { WRITE_PIXELS8(u, y, v); WRITE_PIXELS8(y, u, y); WRITE_PIXELS8(v, y, u); @@ -200,7 +199,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, AV_WL32(dst, val); dst += 4; } - memset(dst, 0, line_padding); dst += line_padding; diff --git a/libavcodec/v210enc.h b/libavcodec/v210enc.h index ea6ae41c71..1186e2fe1d 100644 --- a/libavcodec/v210enc.h +++ b/libavcodec/v210enc.h @@ -24,8 +24,10 @@ #include "libavutil/pixfmt.h" typedef struct { - void (*pack_line_8)(const uint8_t *y, const uint8_t *u, const uint8_t *v, uint8_t *dst, ptrdiff_t width); - void (*pack_line_10)(const uint16_t *y, const uint16_t *u, const uint16_t *v, uint8_t *dst, ptrdiff_t width); + void (*pack_line_8)(const uint8_t *y, const uint8_t *u, + const uint8_t *v, uint8_t *dst, ptrdiff_t width); + void (*pack_line_10)(const uint16_t *y, const uint16_t *u, + const uint16_t *v, uint8_t *dst, ptrdiff_t width); } V210EncContext; void ff_v210enc_init_x86(V210EncContext *s); diff --git a/libavcodec/x86/v210enc_init.c b/libavcodec/x86/v210enc_init.c index 3ac498ac5f..2afb1b2d7b 100644 --- a/libavcodec/x86/v210enc_init.c +++ b/libavcodec/x86/v210enc_init.c @@ -19,19 +19,24 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/v210enc.h" -void ff_v210_planar_pack_8_ssse3(const uint8_t *y, const uint8_t *u, const uint8_t *v, uint8_t *dst, ptrdiff_t width); -void ff_v210_planar_pack_8_avx(const uint8_t *y, const uint8_t *u, const uint8_t *v, uint8_t *dst, ptrdiff_t width); -void ff_v210_planar_pack_10_ssse3(const uint16_t *y, const uint16_t *u, const uint16_t *v, uint8_t *dst, ptrdiff_t width); +void ff_v210_planar_pack_8_ssse3(const uint8_t *y, const uint8_t *u, + const uint8_t *v, uint8_t *dst, + ptrdiff_t width); +void ff_v210_planar_pack_8_avx(const uint8_t *y, const uint8_t *u, + const uint8_t *v, uint8_t *dst, ptrdiff_t width); +void ff_v210_planar_pack_10_ssse3(const uint16_t *y, const uint16_t *u, + const uint16_t *v, uint8_t *dst, + ptrdiff_t width); av_cold void ff_v210enc_init_x86(V210EncContext *s) { int cpu_flags = av_get_cpu_flags(); - if( EXTERNAL_SSSE3(cpu_flags) ) { + if (EXTERNAL_SSSE3(cpu_flags)) { s->pack_line_8 = ff_v210_planar_pack_8_ssse3; s->pack_line_10 = ff_v210_planar_pack_10_ssse3; } - if( EXTERNAL_AVX(cpu_flags) ) + if (EXTERNAL_AVX(cpu_flags)) s->pack_line_8 = ff_v210_planar_pack_8_avx; } |