diff options
author | Anton Khirnov <anton@khirnov.net> | 2015-07-24 08:24:21 +0200 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2015-12-05 21:08:04 +0100 |
commit | 1f821750f0b8d0c87cbf88a28ad699b92db5ec88 (patch) | |
tree | 78ec782cd9a422df28fa05c2c0a72650f1ec89c1 /libavcodec/hevcdsp.c | |
parent | 6788baebb3680d447eabdadf3f5743c8470a4611 (diff) | |
download | ffmpeg-1f821750f0b8d0c87cbf88a28ad699b92db5ec88.tar.gz |
hevcdsp: split the qpel functions by width instead of by the subpixel fraction
This should allow for more efficient SIMD.
Keep the C versions as they are now, to allow the compiler to inline the
interpolation coefficients.
Diffstat (limited to 'libavcodec/hevcdsp.c')
-rw-r--r-- | libavcodec/hevcdsp.c | 30 |
1 files changed, 14 insertions, 16 deletions
diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c index 216101a083..86d9e85b92 100644 --- a/libavcodec/hevcdsp.c +++ b/libavcodec/hevcdsp.c @@ -116,6 +116,12 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) #undef FUNC #define FUNC(a, depth) a ## _ ## depth +#define QPEL_FUNC(i, width, depth) \ + hevcdsp->put_hevc_qpel[0][0][i] = FUNC(put_hevc_qpel_pixels_ ## width, depth); \ + hevcdsp->put_hevc_qpel[0][1][i] = FUNC(put_hevc_qpel_h_ ## width, depth); \ + hevcdsp->put_hevc_qpel[1][0][i] = FUNC(put_hevc_qpel_v_ ## width, depth); \ + hevcdsp->put_hevc_qpel[1][1][i] = FUNC(put_hevc_qpel_hv_ ## width, depth); \ + #define HEVC_DSP(depth) \ hevcdsp->put_pcm = FUNC(put_pcm, depth); \ hevcdsp->transquant_bypass[0] = FUNC(transquant_bypass4x4, depth); \ @@ -139,22 +145,14 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) hevcdsp->sao_edge_filter[2] = FUNC(sao_edge_filter_2, depth); \ hevcdsp->sao_edge_filter[3] = FUNC(sao_edge_filter_3, depth); \ \ - hevcdsp->put_hevc_qpel[0][0] = FUNC(put_hevc_qpel_pixels, depth); \ - hevcdsp->put_hevc_qpel[0][1] = FUNC(put_hevc_qpel_h1, depth); \ - hevcdsp->put_hevc_qpel[0][2] = FUNC(put_hevc_qpel_h2, depth); \ - hevcdsp->put_hevc_qpel[0][3] = FUNC(put_hevc_qpel_h3, depth); \ - hevcdsp->put_hevc_qpel[1][0] = FUNC(put_hevc_qpel_v1, depth); \ - hevcdsp->put_hevc_qpel[1][1] = FUNC(put_hevc_qpel_h1v1, depth); \ - hevcdsp->put_hevc_qpel[1][2] = FUNC(put_hevc_qpel_h2v1, depth); \ - hevcdsp->put_hevc_qpel[1][3] = FUNC(put_hevc_qpel_h3v1, depth); \ - hevcdsp->put_hevc_qpel[2][0] = FUNC(put_hevc_qpel_v2, depth); \ - hevcdsp->put_hevc_qpel[2][1] = FUNC(put_hevc_qpel_h1v2, depth); \ - hevcdsp->put_hevc_qpel[2][2] = FUNC(put_hevc_qpel_h2v2, depth); \ - hevcdsp->put_hevc_qpel[2][3] = FUNC(put_hevc_qpel_h3v2, depth); \ - hevcdsp->put_hevc_qpel[3][0] = FUNC(put_hevc_qpel_v3, depth); \ - hevcdsp->put_hevc_qpel[3][1] = FUNC(put_hevc_qpel_h1v3, depth); \ - hevcdsp->put_hevc_qpel[3][2] = FUNC(put_hevc_qpel_h2v3, depth); \ - hevcdsp->put_hevc_qpel[3][3] = FUNC(put_hevc_qpel_h3v3, depth); \ + QPEL_FUNC(0, 4, depth); \ + QPEL_FUNC(1, 8, depth); \ + QPEL_FUNC(2, 12, depth); \ + QPEL_FUNC(3, 16, depth); \ + QPEL_FUNC(4, 24, depth); \ + QPEL_FUNC(5, 32, depth); \ + QPEL_FUNC(6, 48, depth); \ + QPEL_FUNC(7, 64, depth); \ \ hevcdsp->put_hevc_epel[0][0] = FUNC(put_hevc_epel_pixels, depth); \ hevcdsp->put_hevc_epel[0][1] = FUNC(put_hevc_epel_h, depth); \ |