aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/hevcdsp.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2015-07-24 08:24:21 +0200
committerAnton Khirnov <anton@khirnov.net>2015-12-05 21:08:04 +0100
commit1f821750f0b8d0c87cbf88a28ad699b92db5ec88 (patch)
tree78ec782cd9a422df28fa05c2c0a72650f1ec89c1 /libavcodec/hevcdsp.c
parent6788baebb3680d447eabdadf3f5743c8470a4611 (diff)
downloadffmpeg-1f821750f0b8d0c87cbf88a28ad699b92db5ec88.tar.gz
hevcdsp: split the qpel functions by width instead of by the subpixel fraction
This should allow for more efficient SIMD. Keep the C versions as they are now, to allow the compiler to inline the interpolation coefficients.
Diffstat (limited to 'libavcodec/hevcdsp.c')
-rw-r--r--libavcodec/hevcdsp.c30
1 files changed, 14 insertions, 16 deletions
diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index 216101a083..86d9e85b92 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -116,6 +116,12 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
#undef FUNC
#define FUNC(a, depth) a ## _ ## depth
+#define QPEL_FUNC(i, width, depth) \
+ hevcdsp->put_hevc_qpel[0][0][i] = FUNC(put_hevc_qpel_pixels_ ## width, depth); \
+ hevcdsp->put_hevc_qpel[0][1][i] = FUNC(put_hevc_qpel_h_ ## width, depth); \
+ hevcdsp->put_hevc_qpel[1][0][i] = FUNC(put_hevc_qpel_v_ ## width, depth); \
+ hevcdsp->put_hevc_qpel[1][1][i] = FUNC(put_hevc_qpel_hv_ ## width, depth); \
+
#define HEVC_DSP(depth) \
hevcdsp->put_pcm = FUNC(put_pcm, depth); \
hevcdsp->transquant_bypass[0] = FUNC(transquant_bypass4x4, depth); \
@@ -139,22 +145,14 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
hevcdsp->sao_edge_filter[2] = FUNC(sao_edge_filter_2, depth); \
hevcdsp->sao_edge_filter[3] = FUNC(sao_edge_filter_3, depth); \
\
- hevcdsp->put_hevc_qpel[0][0] = FUNC(put_hevc_qpel_pixels, depth); \
- hevcdsp->put_hevc_qpel[0][1] = FUNC(put_hevc_qpel_h1, depth); \
- hevcdsp->put_hevc_qpel[0][2] = FUNC(put_hevc_qpel_h2, depth); \
- hevcdsp->put_hevc_qpel[0][3] = FUNC(put_hevc_qpel_h3, depth); \
- hevcdsp->put_hevc_qpel[1][0] = FUNC(put_hevc_qpel_v1, depth); \
- hevcdsp->put_hevc_qpel[1][1] = FUNC(put_hevc_qpel_h1v1, depth); \
- hevcdsp->put_hevc_qpel[1][2] = FUNC(put_hevc_qpel_h2v1, depth); \
- hevcdsp->put_hevc_qpel[1][3] = FUNC(put_hevc_qpel_h3v1, depth); \
- hevcdsp->put_hevc_qpel[2][0] = FUNC(put_hevc_qpel_v2, depth); \
- hevcdsp->put_hevc_qpel[2][1] = FUNC(put_hevc_qpel_h1v2, depth); \
- hevcdsp->put_hevc_qpel[2][2] = FUNC(put_hevc_qpel_h2v2, depth); \
- hevcdsp->put_hevc_qpel[2][3] = FUNC(put_hevc_qpel_h3v2, depth); \
- hevcdsp->put_hevc_qpel[3][0] = FUNC(put_hevc_qpel_v3, depth); \
- hevcdsp->put_hevc_qpel[3][1] = FUNC(put_hevc_qpel_h1v3, depth); \
- hevcdsp->put_hevc_qpel[3][2] = FUNC(put_hevc_qpel_h2v3, depth); \
- hevcdsp->put_hevc_qpel[3][3] = FUNC(put_hevc_qpel_h3v3, depth); \
+ QPEL_FUNC(0, 4, depth); \
+ QPEL_FUNC(1, 8, depth); \
+ QPEL_FUNC(2, 12, depth); \
+ QPEL_FUNC(3, 16, depth); \
+ QPEL_FUNC(4, 24, depth); \
+ QPEL_FUNC(5, 32, depth); \
+ QPEL_FUNC(6, 48, depth); \
+ QPEL_FUNC(7, 64, depth); \
\
hevcdsp->put_hevc_epel[0][0] = FUNC(put_hevc_epel_pixels, depth); \
hevcdsp->put_hevc_epel[0][1] = FUNC(put_hevc_epel_h, depth); \