diff options
author | Anton Khirnov <anton@khirnov.net> | 2015-07-24 18:56:54 +0200 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2015-12-05 21:10:41 +0100 |
commit | 688417399c69aadd4c287bdb0dec82ef8799011c (patch) | |
tree | 872ef2840ecd6007bad27df007b0f97ee19c9ae0 /libavcodec/hevcdsp_template.c | |
parent | 818bfe7f0a3ff243deb63c4b146de2563f38ffd4 (diff) | |
download | ffmpeg-688417399c69aadd4c287bdb0dec82ef8799011c.tar.gz |
hevcdsp: split the pred functions by width
This should allow for more efficient SIMD.
Diffstat (limited to 'libavcodec/hevcdsp_template.c')
-rw-r--r-- | libavcodec/hevcdsp_template.c | 81 |
1 files changed, 63 insertions, 18 deletions
diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c index d832904dcb..723f4d4520 100644 --- a/libavcodec/hevcdsp_template.c +++ b/libavcodec/hevcdsp_template.c @@ -1130,9 +1130,10 @@ EPEL(6) EPEL(4) EPEL(2) -static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride, - int16_t *src, ptrdiff_t srcstride, - int width, int height) +static av_always_inline void +FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride, + int16_t *src, ptrdiff_t srcstride, + int width, int height) { int x, y; pixel *dst = (pixel *)_dst; @@ -1152,10 +1153,11 @@ static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride, } } -static void FUNC(put_unweighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride, - int16_t *src1, int16_t *src2, - ptrdiff_t srcstride, - int width, int height) +static av_always_inline void +FUNC(put_unweighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride, + int16_t *src1, int16_t *src2, + ptrdiff_t srcstride, + int width, int height) { int x, y; pixel *dst = (pixel *)_dst; @@ -1177,10 +1179,11 @@ static void FUNC(put_unweighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride, } } -static void FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag, - uint8_t *_dst, ptrdiff_t _dststride, - int16_t *src, ptrdiff_t srcstride, - int width, int height) +static av_always_inline void +FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag, + uint8_t *_dst, ptrdiff_t _dststride, + int16_t *src, ptrdiff_t srcstride, + int width, int height) { int shift, log2Wd, wx, ox, x, y, offset; pixel *dst = (pixel *)_dst; @@ -1205,13 +1208,14 @@ static void FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag, } } -static void FUNC(weighted_pred_avg)(uint8_t denom, - int16_t wl0Flag, int16_t wl1Flag, - int16_t ol0Flag, int16_t ol1Flag, - uint8_t *_dst, ptrdiff_t _dststride, - int16_t *src1, int16_t *src2, - ptrdiff_t srcstride, - int width, int height) +static av_always_inline void +FUNC(weighted_pred_avg)(uint8_t denom, + int16_t wl0Flag, int16_t wl1Flag, + int16_t ol0Flag, int16_t ol1Flag, + uint8_t *_dst, ptrdiff_t _dststride, + int16_t *src1, int16_t *src2, + ptrdiff_t srcstride, + int width, int height) { int shift, log2Wd, w0, w1, o0, o1, x, y; pixel *dst = (pixel *)_dst; @@ -1234,6 +1238,47 @@ static void FUNC(weighted_pred_avg)(uint8_t denom, } } +#define PUT_PRED(w) \ +static void FUNC(put_unweighted_pred_ ## w)(uint8_t *dst, ptrdiff_t dststride, \ + int16_t *src, ptrdiff_t srcstride, \ + int height) \ +{ \ + FUNC(put_unweighted_pred)(dst, dststride, src, srcstride, w, height); \ +} \ +static void FUNC(put_unweighted_pred_avg_ ## w)(uint8_t *dst, ptrdiff_t dststride, \ + int16_t *src1, int16_t *src2, \ + ptrdiff_t srcstride, int height) \ +{ \ + FUNC(put_unweighted_pred_avg)(dst, dststride, src1, src2, srcstride, w, height); \ +} \ +static void FUNC(put_weighted_pred_ ## w)(uint8_t denom, int16_t weight, int16_t offset, \ + uint8_t *dst, ptrdiff_t dststride, \ + int16_t *src, ptrdiff_t srcstride, int height) \ +{ \ + FUNC(weighted_pred)(denom, weight, offset, \ + dst, dststride, src, srcstride, w, height); \ +} \ +static void FUNC(put_weighted_pred_avg_ ## w)(uint8_t denom, int16_t weight0, int16_t weight1, \ + int16_t offset0, int16_t offset1, \ + uint8_t *dst, ptrdiff_t dststride, \ + int16_t *src1, int16_t *src2, \ + ptrdiff_t srcstride, int height) \ +{ \ + FUNC(weighted_pred_avg)(denom, weight0, weight1, offset0, offset1, \ + dst, dststride, src1, src2, srcstride, w, height); \ +} + +PUT_PRED(64) +PUT_PRED(48) +PUT_PRED(32) +PUT_PRED(24) +PUT_PRED(16) +PUT_PRED(12) +PUT_PRED(8) +PUT_PRED(6) +PUT_PRED(4) +PUT_PRED(2) + // line zero #define P3 pix[-4 * xstride] #define P2 pix[-3 * xstride] |