aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDiego Biurrun <diego@biurrun.de>2013-12-21 16:03:59 +0100
committerDiego Biurrun <diego@biurrun.de>2014-03-22 06:17:30 -0700
commit57f09608e1600d1cf1679885a46f5004d522d68f (patch)
treeddb3ce6e7d2ac7f8479add595d7212434d7ca8f7
parent82dd1026cfc1d72b04019185bea4c1c9621ace3f (diff)
downloadffmpeg-57f09608e1600d1cf1679885a46f5004d522d68f.tar.gz
dsputil: Move thirdpel-related bits into their own context
-rwxr-xr-xconfigure3
-rw-r--r--doc/optimization.txt3
-rw-r--r--libavcodec/Makefile1
-rw-r--r--libavcodec/dsputil.c299
-rw-r--r--libavcodec/dsputil.h16
-rw-r--r--libavcodec/h264qpel_template.c1
-rw-r--r--libavcodec/hpel_template.c45
-rw-r--r--libavcodec/hpeldsp_template.c1
-rw-r--r--libavcodec/svq3.c18
-rw-r--r--libavcodec/tpel_template.c80
-rw-r--r--libavcodec/tpeldsp.c333
-rw-r--r--libavcodec/tpeldsp.h59
12 files changed, 489 insertions, 370 deletions
diff --git a/configure b/configure
index fced12f9bd..f26374a3d3 100755
--- a/configure
+++ b/configure
@@ -1537,6 +1537,7 @@ CONFIG_EXTRA="
rtpdec
rtpenc_chain
sinewin
+ tpeldsp
videodsp
vp3dsp
"
@@ -1820,7 +1821,7 @@ sipr_decoder_select="lsp"
sp5x_decoder_select="mjpeg_decoder"
svq1_decoder_select="hpeldsp"
svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc"
-svq3_decoder_select="h264_decoder hpeldsp"
+svq3_decoder_select="h264_decoder hpeldsp tpeldsp"
svq3_decoder_suggest="zlib"
tak_decoder_select="dsputil"
theora_decoder_select="vp3_decoder"
diff --git a/doc/optimization.txt b/doc/optimization.txt
index 42ad15ed28..b51183fa34 100644
--- a/doc/optimization.txt
+++ b/doc/optimization.txt
@@ -79,9 +79,6 @@ qpel{8,16}_mc??_old_c / *pixels{8,16}_l4
Just used to work around a bug in an old libavcodec encoder version.
Don't optimize them.
-tpel_mc_func {put,avg}_tpel_pixels_tab
- Used only for SVQ3, so only optimize them if you need fast SVQ3 decoding.
-
add_bytes/diff_bytes
For huffyuv only, optimize if you want a faster ffhuffyuv codec.
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index c04b3f1294..3d178a1387 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -65,6 +65,7 @@ OBJS-$(CONFIG_RANGECODER) += rangecoder.o
RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o
OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes)
OBJS-$(CONFIG_SINEWIN) += sinewin.o
+OBJS-$(CONFIG_TPELDSP) += tpeldsp.o
OBJS-$(CONFIG_VAAPI) += vaapi.o
OBJS-$(CONFIG_VDPAU) += vdpau.o
OBJS-$(CONFIG_VIDEODSP) += videodsp.o
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 9fe6f0b757..b81ba47521 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -48,6 +48,7 @@ uint32_t ff_square_tab[512] = { 0, };
#undef BIT_DEPTH
#define BIT_DEPTH 8
+#include "tpel_template.c"
#include "dsputil_template.c"
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
@@ -540,284 +541,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
}
}
-static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- switch (width) {
- case 2:
- put_pixels2_8_c(dst, src, stride, height);
- break;
- case 4:
- put_pixels4_8_c(dst, src, stride, height);
- break;
- case 8:
- put_pixels8_8_c(dst, src, stride, height);
- break;
- case 16:
- put_pixels16_8_c(dst, src, stride, height);
- break;
- }
-}
-
-static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = ((2 * src[j] + src[j + 1] + 1) *
- 683) >> 11;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = ((src[j] + 2 * src[j + 1] + 1) *
- 683) >> 11;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = ((2 * src[j] + src[j + stride] + 1) *
- 683) >> 11;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = ((4 * src[j] + 3 * src[j + 1] +
- 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
- 2731) >> 15;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = ((3 * src[j] + 2 * src[j + 1] +
- 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
- 2731) >> 15;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = ((src[j] + 2 * src[j + stride] + 1) *
- 683) >> 11;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = ((3 * src[j] + 4 * src[j + 1] +
- 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
- 2731) >> 15;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = ((2 * src[j] + 3 * src[j + 1] +
- 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
- 2731) >> 15;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- switch (width) {
- case 2:
- avg_pixels2_8_c(dst, src, stride, height);
- break;
- case 4:
- avg_pixels4_8_c(dst, src, stride, height);
- break;
- case 8:
- avg_pixels8_8_c(dst, src, stride, height);
- break;
- case 16:
- avg_pixels16_8_c(dst, src, stride, height);
- break;
- }
-}
-
-static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = (dst[j] +
- (((2 * src[j] + src[j + 1] + 1) *
- 683) >> 11) + 1) >> 1;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = (dst[j] +
- (((src[j] + 2 * src[j + 1] + 1) *
- 683) >> 11) + 1) >> 1;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = (dst[j] +
- (((2 * src[j] + src[j + stride] + 1) *
- 683) >> 11) + 1) >> 1;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = (dst[j] +
- (((4 * src[j] + 3 * src[j + 1] +
- 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
- 2731) >> 15) + 1) >> 1;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = (dst[j] +
- (((3 * src[j] + 2 * src[j + 1] +
- 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
- 2731) >> 15) + 1) >> 1;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = (dst[j] +
- (((src[j] + 2 * src[j + stride] + 1) *
- 683) >> 11) + 1) >> 1;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = (dst[j] +
- (((3 * src[j] + 4 * src[j + 1] +
- 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
- 2731) >> 15) + 1) >> 1;
- src += stride;
- dst += stride;
- }
-}
-
-static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
- int stride, int width, int height)
-{
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++)
- dst[j] = (dst[j] +
- (((2 * src[j] + 3 * src[j + 1] +
- 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
- 2731) >> 15) + 1) >> 1;
- src += stride;
- dst += stride;
- }
-}
-
#define QPEL_MC(r, OPNAME, RND, OP) \
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, \
int dstStride, int srcStride, \
@@ -2781,26 +2504,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
c->pix_abs[1][2] = pix_abs8_y2_c;
c->pix_abs[1][3] = pix_abs8_xy2_c;
- c->put_tpel_pixels_tab[0] = put_tpel_pixels_mc00_c;
- c->put_tpel_pixels_tab[1] = put_tpel_pixels_mc10_c;
- c->put_tpel_pixels_tab[2] = put_tpel_pixels_mc20_c;
- c->put_tpel_pixels_tab[4] = put_tpel_pixels_mc01_c;
- c->put_tpel_pixels_tab[5] = put_tpel_pixels_mc11_c;
- c->put_tpel_pixels_tab[6] = put_tpel_pixels_mc21_c;
- c->put_tpel_pixels_tab[8] = put_tpel_pixels_mc02_c;
- c->put_tpel_pixels_tab[9] = put_tpel_pixels_mc12_c;
- c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
-
- c->avg_tpel_pixels_tab[0] = avg_tpel_pixels_mc00_c;
- c->avg_tpel_pixels_tab[1] = avg_tpel_pixels_mc10_c;
- c->avg_tpel_pixels_tab[2] = avg_tpel_pixels_mc20_c;
- c->avg_tpel_pixels_tab[4] = avg_tpel_pixels_mc01_c;
- c->avg_tpel_pixels_tab[5] = avg_tpel_pixels_mc11_c;
- c->avg_tpel_pixels_tab[6] = avg_tpel_pixels_mc21_c;
- c->avg_tpel_pixels_tab[8] = avg_tpel_pixels_mc02_c;
- c->avg_tpel_pixels_tab[9] = avg_tpel_pixels_mc12_c;
- c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
-
#define dspfunc(PFX, IDX, NUM) \
c->PFX ## _pixels_tab[IDX][0] = PFX ## NUM ## _mc00_c; \
c->PFX ## _pixels_tab[IDX][1] = PFX ## NUM ## _mc10_c; \
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index d596e29099..052ac50694 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -71,9 +71,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
* Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16.
* h for op_pixels_func is limited to { width / 2, width },
* but never larger than 16 and never smaller than 4. */
-typedef void (*tpel_mc_func)(uint8_t *block /* align width (8 or 16) */,
- const uint8_t *pixels /* align 1 */,
- int line_size, int w, int h);
typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */,
uint8_t *src /* align 1 */, ptrdiff_t stride);
@@ -188,19 +185,6 @@ typedef struct DSPContext {
int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
int size);
- /**
- * Thirdpel motion compensation with rounding (a + b + 1) >> 1.
- * this is an array[12] of motion compensation functions for the
- * 9 thirdpel positions<br>
- * *pixels_tab[xthirdpel + 4 * ythirdpel]
- * @param block destination where the result is stored
- * @param pixels source
- * @param line_size number of bytes in a horizontal line of block
- * @param h height
- */
- tpel_mc_func put_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
- tpel_mc_func avg_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
-
qpel_mc_func put_qpel_pixels_tab[2][16];
qpel_mc_func avg_qpel_pixels_tab[2][16];
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
diff --git a/libavcodec/h264qpel_template.c b/libavcodec/h264qpel_template.c
index 71821798a4..d03b0dc443 100644
--- a/libavcodec/h264qpel_template.c
+++ b/libavcodec/h264qpel_template.c
@@ -24,6 +24,7 @@
#include "bit_depth_template.c"
#include "hpel_template.c"
+#include "tpel_template.c"
static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
{
diff --git a/libavcodec/hpel_template.c b/libavcodec/hpel_template.c
index 1bc18ccad0..0a8550a738 100644
--- a/libavcodec/hpel_template.c
+++ b/libavcodec/hpel_template.c
@@ -22,47 +22,6 @@
#include "pixels.h"
#define DEF_HPEL(OPNAME, OP) \
-static inline void FUNCC(OPNAME ## _pixels2)(uint8_t *block, \
- const uint8_t *pixels, \
- ptrdiff_t line_size, \
- int h) \
-{ \
- int i; \
- for (i = 0; i < h; i++) { \
- OP(*((pixel2 *) block), AV_RN2P(pixels)); \
- pixels += line_size; \
- block += line_size; \
- } \
-} \
- \
-static inline void FUNCC(OPNAME ## _pixels4)(uint8_t *block, \
- const uint8_t *pixels, \
- ptrdiff_t line_size, \
- int h) \
-{ \
- int i; \
- for (i = 0; i < h; i++) { \
- OP(*((pixel4 *) block), AV_RN4P(pixels)); \
- pixels += line_size; \
- block += line_size; \
- } \
-} \
- \
-static inline void FUNCC(OPNAME ## _pixels8)(uint8_t *block, \
- const uint8_t *pixels, \
- ptrdiff_t line_size, \
- int h) \
-{ \
- int i; \
- for (i = 0; i < h; i++) { \
- OP(*((pixel4 *) block), AV_RN4P(pixels)); \
- OP(*((pixel4 *) (block + 4 * sizeof(pixel))), \
- AV_RN4P(pixels + 4 * sizeof(pixel))); \
- pixels += line_size; \
- block += line_size; \
- } \
-} \
- \
static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, \
const uint8_t *src1, \
const uint8_t *src2, \
@@ -134,10 +93,6 @@ static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, \
dst_stride, src_stride1, \
src_stride2, h); \
} \
- \
-CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16), \
- FUNCC(OPNAME ## _pixels8), \
- 8 * sizeof(pixel))
#define op_avg(a, b) a = rnd_avg_pixel4(a, b)
#define op_put(a, b) a = b
diff --git a/libavcodec/hpeldsp_template.c b/libavcodec/hpeldsp_template.c
index f190457b96..3039bfa9d6 100644
--- a/libavcodec/hpeldsp_template.c
+++ b/libavcodec/hpeldsp_template.c
@@ -33,6 +33,7 @@
#include "bit_depth_template.c"
#include "hpel_template.c"
+#include "tpel_template.c"
#define PIXOP2(OPNAME, OP) \
static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, \
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index 4916314c08..fc2120b2cb 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -54,6 +54,7 @@
#include "golomb.h"
#include "hpeldsp.h"
#include "rectangle.h"
+#include "tpeldsp.h"
#if CONFIG_ZLIB
#include <zlib.h>
@@ -70,6 +71,7 @@
typedef struct {
H264Context h;
HpelDSPContext hdsp;
+ TpelDSPContext tdsp;
H264Picture *cur_pic;
H264Picture *next_pic;
H264Picture *last_pic;
@@ -321,9 +323,9 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
src = h->edge_emu_buffer;
}
if (thirdpel)
- (avg ? h->dsp.avg_tpel_pixels_tab
- : h->dsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize,
- width, height);
+ (avg ? s->tdsp.avg_tpel_pixels_tab
+ : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize,
+ width, height);
else
(avg ? s->hdsp.avg_pixels_tab
: s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, h->linesize,
@@ -349,10 +351,10 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
src = h->edge_emu_buffer;
}
if (thirdpel)
- (avg ? h->dsp.avg_tpel_pixels_tab
- : h->dsp.put_tpel_pixels_tab)[dxy](dest, src,
- h->uvlinesize,
- width, height);
+ (avg ? s->tdsp.avg_tpel_pixels_tab
+ : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
+ h->uvlinesize,
+ width, height);
else
(avg ? s->hdsp.avg_pixels_tab
: s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
@@ -881,6 +883,8 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
return -1;
ff_hpeldsp_init(&s->hdsp, avctx->flags);
+ ff_tpeldsp_init(&s->tdsp);
+
h->flags = avctx->flags;
h->is_complex = 1;
h->picture_structure = PICT_FRAME;
diff --git a/libavcodec/tpel_template.c b/libavcodec/tpel_template.c
new file mode 100644
index 0000000000..f07679a88f
--- /dev/null
+++ b/libavcodec/tpel_template.c
@@ -0,0 +1,80 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/intreadwrite.h"
+#include "pixels.h"
+#include "rnd_avg.h"
+
+#include "bit_depth_template.c"
+
+#define DEF_TPEL(OPNAME, OP) \
+static inline void FUNCC(OPNAME ## _pixels2)(uint8_t *block, \
+ const uint8_t *pixels, \
+ ptrdiff_t line_size, \
+ int h) \
+{ \
+ int i; \
+ for (i = 0; i < h; i++) { \
+ OP(*((pixel2 *) block), AV_RN2P(pixels)); \
+ pixels += line_size; \
+ block += line_size; \
+ } \
+} \
+ \
+static inline void FUNCC(OPNAME ## _pixels4)(uint8_t *block, \
+ const uint8_t *pixels, \
+ ptrdiff_t line_size, \
+ int h) \
+{ \
+ int i; \
+ for (i = 0; i < h; i++) { \
+ OP(*((pixel4 *) block), AV_RN4P(pixels)); \
+ pixels += line_size; \
+ block += line_size; \
+ } \
+} \
+ \
+static inline void FUNCC(OPNAME ## _pixels8)(uint8_t *block, \
+ const uint8_t *pixels, \
+ ptrdiff_t line_size, \
+ int h) \
+{ \
+ int i; \
+ for (i = 0; i < h; i++) { \
+ OP(*((pixel4 *) block), AV_RN4P(pixels)); \
+ OP(*((pixel4 *) (block + 4 * sizeof(pixel))), \
+ AV_RN4P(pixels + 4 * sizeof(pixel))); \
+ pixels += line_size; \
+ block += line_size; \
+ } \
+} \
+ \
+CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16), \
+ FUNCC(OPNAME ## _pixels8), \
+ 8 * sizeof(pixel))
+
+#define op_avg(a, b) a = rnd_avg_pixel4(a, b)
+#define op_put(a, b) a = b
+
+DEF_TPEL(avg, op_avg)
+DEF_TPEL(put, op_put)
+#undef op_avg
+#undef op_put
diff --git a/libavcodec/tpeldsp.c b/libavcodec/tpeldsp.c
new file mode 100644
index 0000000000..6a1681311a
--- /dev/null
+++ b/libavcodec/tpeldsp.c
@@ -0,0 +1,333 @@
+/*
+ * thirdpel DSP functions
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * thirdpel DSP functions
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "tpeldsp.h"
+
+#define BIT_DEPTH 8
+#include "tpel_template.c"
+
+static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ switch (width) {
+ case 2:
+ put_pixels2_8_c(dst, src, stride, height);
+ break;
+ case 4:
+ put_pixels4_8_c(dst, src, stride, height);
+ break;
+ case 8:
+ put_pixels8_8_c(dst, src, stride, height);
+ break;
+ case 16:
+ put_pixels16_8_c(dst, src, stride, height);
+ break;
+ }
+}
+
+static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = ((2 * src[j] + src[j + 1] + 1) *
+ 683) >> 11;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = ((src[j] + 2 * src[j + 1] + 1) *
+ 683) >> 11;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = ((2 * src[j] + src[j + stride] + 1) *
+ 683) >> 11;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = ((4 * src[j] + 3 * src[j + 1] +
+ 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
+ 2731) >> 15;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = ((3 * src[j] + 2 * src[j + 1] +
+ 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
+ 2731) >> 15;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = ((src[j] + 2 * src[j + stride] + 1) *
+ 683) >> 11;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = ((3 * src[j] + 4 * src[j + 1] +
+ 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
+ 2731) >> 15;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = ((2 * src[j] + 3 * src[j + 1] +
+ 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
+ 2731) >> 15;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ switch (width) {
+ case 2:
+ avg_pixels2_8_c(dst, src, stride, height);
+ break;
+ case 4:
+ avg_pixels4_8_c(dst, src, stride, height);
+ break;
+ case 8:
+ avg_pixels8_8_c(dst, src, stride, height);
+ break;
+ case 16:
+ avg_pixels16_8_c(dst, src, stride, height);
+ break;
+ }
+}
+
+static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = (dst[j] +
+ (((2 * src[j] + src[j + 1] + 1) *
+ 683) >> 11) + 1) >> 1;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = (dst[j] +
+ (((src[j] + 2 * src[j + 1] + 1) *
+ 683) >> 11) + 1) >> 1;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = (dst[j] +
+ (((2 * src[j] + src[j + stride] + 1) *
+ 683) >> 11) + 1) >> 1;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = (dst[j] +
+ (((4 * src[j] + 3 * src[j + 1] +
+ 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
+ 2731) >> 15) + 1) >> 1;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = (dst[j] +
+ (((3 * src[j] + 2 * src[j + 1] +
+ 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
+ 2731) >> 15) + 1) >> 1;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = (dst[j] +
+ (((src[j] + 2 * src[j + stride] + 1) *
+ 683) >> 11) + 1) >> 1;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = (dst[j] +
+ (((3 * src[j] + 4 * src[j + 1] +
+ 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
+ 2731) >> 15) + 1) >> 1;
+ src += stride;
+ dst += stride;
+ }
+}
+
+static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
+ int stride, int width, int height)
+{
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++)
+ dst[j] = (dst[j] +
+ (((2 * src[j] + 3 * src[j + 1] +
+ 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
+ 2731) >> 15) + 1) >> 1;
+ src += stride;
+ dst += stride;
+ }
+}
+
+av_cold void ff_tpeldsp_init(TpelDSPContext *c)
+{
+ c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
+ c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
+ c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
+ c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
+ c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
+ c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
+ c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
+ c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
+ c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
+
+ c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
+ c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
+ c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
+ c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
+ c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
+ c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
+ c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
+ c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
+ c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
+}
diff --git a/libavcodec/tpeldsp.h b/libavcodec/tpeldsp.h
new file mode 100644
index 0000000000..9c67d60850
--- /dev/null
+++ b/libavcodec/tpeldsp.h
@@ -0,0 +1,59 @@
+/*
+ * thirdpel DSP functions
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * thirdpel DSP functions
+ */
+
+#ifndef AVCODEC_TPELDSP_H
+#define AVCODEC_TPELDSP_H
+
+#include <stdint.h>
+
+/* add and put pixel (decoding) */
+// blocksizes for hpel_pixels_func are 8x4,8x8 16x8 16x16
+// h for hpel_pixels_func is limited to {width/2, width} but never larger
+// than 16 and never smaller than 4
+typedef void (*tpel_mc_func)(uint8_t *block /* align width (8 or 16) */,
+ const uint8_t *pixels /* align 1 */,
+ int line_size, int w, int h);
+
+/**
+ * thirdpel DSP context
+ */
+typedef struct TpelDSPContext {
+ /**
+ * Thirdpel motion compensation with rounding (a + b + 1) >> 1.
+ * this is an array[12] of motion compensation functions for the
+ * 9 thirdpel positions<br>
+ * *pixels_tab[xthirdpel + 4 * ythirdpel]
+ * @param block destination where the result is stored
+ * @param pixels source
+ * @param line_size number of bytes in a horizontal line of block
+ * @param h height
+ */
+ tpel_mc_func put_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
+ tpel_mc_func avg_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
+} TpelDSPContext;
+
+void ff_tpeldsp_init(TpelDSPContext *c);
+
+#endif /* AVCODEC_TPELDSP_H */