aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2014-01-20 20:32:51 +0100
committerMichael Niedermayer <michaelni@gmx.at>2014-01-20 21:32:47 +0100
commitf70d7eb20c1d5a920f80ef73d1bb6199feedcec4 (patch)
treec98da78d0a8d1ac70d36cdd7472cf84a538b00fd
parent78e39aa7ee12bb61cf34d8ca6bebd129d659d9cd (diff)
downloadffmpeg-f70d7eb20c1d5a920f80ef73d1bb6199feedcec4.tar.gz
Move add/diff_int16 to lossless_videodsp
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rwxr-xr-xconfigure9
-rw-r--r--libavcodec/Makefile1
-rw-r--r--libavcodec/dsputil.c41
-rw-r--r--libavcodec/dsputil.h2
-rw-r--r--libavcodec/huffyuv.c1
-rw-r--r--libavcodec/huffyuv.h2
-rw-r--r--libavcodec/huffyuvdec.c2
-rw-r--r--libavcodec/huffyuvenc.c2
-rw-r--r--libavcodec/lossless_videodsp.c69
-rw-r--r--libavcodec/lossless_videodsp.h36
-rw-r--r--libavcodec/x86/Makefile2
-rw-r--r--libavcodec/x86/dsputil.asm64
-rw-r--r--libavcodec/x86/dsputil_init.c3
-rw-r--r--libavcodec/x86/dsputil_x86.h2
-rw-r--r--libavcodec/x86/lossless_videodsp.asm88
-rw-r--r--libavcodec/x86/lossless_videodsp_init.c38
16 files changed, 244 insertions, 118 deletions
diff --git a/configure b/configure
index 1a357e7b23..7b4cfc5f5e 100755
--- a/configure
+++ b/configure
@@ -1683,6 +1683,7 @@ CONFIG_EXTRA="
huffman
intrax8
lgplv3
+ llviddsp
lpc
mpegaudio
mpegaudiodsp
@@ -1904,8 +1905,8 @@ eatqi_decoder_select="aandcttables error_resilience mpegvideo"
exr_decoder_select="zlib"
ffv1_decoder_select="dsputil golomb rangecoder"
ffv1_encoder_select="dsputil rangecoder"
-ffvhuff_decoder_select="dsputil"
-ffvhuff_encoder_select="dsputil huffman"
+ffvhuff_decoder_select="dsputil llviddsp"
+ffvhuff_encoder_select="dsputil huffman llviddsp"
flac_decoder_select="golomb"
flac_encoder_select="dsputil golomb lpc"
flashsv_decoder_select="zlib"
@@ -1927,8 +1928,8 @@ h263p_encoder_select="h263_encoder"
h264_decoder_select="golomb h264chroma h264dsp h264pred h264qpel videodsp"
h264_decoder_suggest="error_resilience"
hevc_decoder_select="dsputil golomb videodsp"
-huffyuv_decoder_select="dsputil"
-huffyuv_encoder_select="dsputil huffman"
+huffyuv_decoder_select="dsputil llviddsp"
+huffyuv_encoder_select="dsputil huffman llviddsp"
iac_decoder_select="dsputil fft mdct sinewin"
imc_decoder_select="dsputil fft mdct sinewin"
indeo3_decoder_select="hpeldsp"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 042acd7302..a2b4b8e981 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -57,6 +57,7 @@ OBJS-$(CONFIG_HPELDSP) += hpeldsp.o
OBJS-$(CONFIG_HUFFMAN) += huffman.o
OBJS-$(CONFIG_INTRAX8) += intrax8.o intrax8dsp.o
OBJS-$(CONFIG_LIBXVID) += libxvid_rc.o
+OBJS-$(CONFIG_LLVIDDSP) += lossless_videodsp.o
OBJS-$(CONFIG_LPC) += lpc.o
OBJS-$(CONFIG_LSP) += lsp.o
OBJS-$(CONFIG_MDCT) += mdct_fixed.o mdct_float.o mdct_fixed_32.o
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 8533e5382c..0e9e34798e 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -1888,45 +1888,6 @@ static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
dst[i+0] = src1[i+0]-src2[i+0];
}
-static void add_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w){
- long i;
- unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
- unsigned long pw_msb = pw_lsb + 0x0001000100010001ULL;
- for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
- long a = *(long*)(src+i);
- long b = *(long*)(dst+i);
- *(long*)(dst+i) = ((a&pw_lsb) + (b&pw_lsb)) ^ ((a^b)&pw_msb);
- }
- for(; i<w; i++)
- dst[i] = (dst[i] + src[i]) & mask;
-}
-
-static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){
- long i;
-#if !HAVE_FAST_UNALIGNED
- if((long)src2 & (sizeof(long)-1)){
- for(i=0; i+7<w; i+=8){
- dst[i+0] = (src1[i+0]-src2[i+0]) & mask;
- dst[i+1] = (src1[i+1]-src2[i+1]) & mask;
- dst[i+2] = (src1[i+2]-src2[i+2]) & mask;
- dst[i+3] = (src1[i+3]-src2[i+3]) & mask;
- }
- }else
-#endif
- {
- unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
- unsigned long pw_msb = pw_lsb + 0x0001000100010001ULL;
-
- for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
- long a = *(long*)(src1+i);
- long b = *(long*)(src2+i);
- *(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb);
- }
- }
- for (; i<w; i++)
- dst[i] = (src1[i] - src2[i]) & mask;
-}
-
static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
int i;
uint8_t l, lt;
@@ -2812,8 +2773,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->add_bytes= add_bytes_c;
c->diff_bytes= diff_bytes_c;
- c->add_int16 = add_int16_c;
- c->diff_int16= diff_int16_c;
c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 7ad96f6cd7..0897c560b4 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -193,8 +193,6 @@ typedef struct DSPContext {
/* huffyuv specific */
void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
void (*diff_bytes)(uint8_t *dst/*align 16*/, const uint8_t *src1/*align 16*/, const uint8_t *src2/*align 1*/,int w);
- void (*add_int16)(uint16_t *dst/*align 16*/, const uint16_t *src/*align 16*/, unsigned mask, int w);
- void (*diff_int16)(uint16_t *dst/*align 16*/, const uint16_t *src1/*align 16*/, const uint16_t *src2/*align 1*/, unsigned mask, int w);
/**
* subtract huffyuv's variant of median prediction
* note, this might read from src1[-1], src2[-1]
diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c
index c183bdf075..8b0dcdcc6e 100644
--- a/libavcodec/huffyuv.c
+++ b/libavcodec/huffyuv.c
@@ -81,6 +81,7 @@ av_cold void ff_huffyuv_common_init(AVCodecContext *avctx)
s->flags = avctx->flags;
ff_dsputil_init(&s->dsp, avctx);
+ ff_llviddsp_init(&s->llviddsp);
s->width = avctx->width;
s->height = avctx->height;
diff --git a/libavcodec/huffyuv.h b/libavcodec/huffyuv.h
index b529f1b52b..0aecad0437 100644
--- a/libavcodec/huffyuv.h
+++ b/libavcodec/huffyuv.h
@@ -35,6 +35,7 @@
#include "dsputil.h"
#include "get_bits.h"
#include "put_bits.h"
+#include "lossless_videodsp.h"
#define VLC_BITS 11
@@ -92,6 +93,7 @@ typedef struct HYuvContext {
uint8_t *bitstream_buffer;
unsigned int bitstream_buffer_size;
DSPContext dsp;
+ LLVidDSPContext llviddsp;
} HYuvContext;
void ff_huffyuv_common_init(AVCodecContext *s);
diff --git a/libavcodec/huffyuvdec.c b/libavcodec/huffyuvdec.c
index aafa9e4abb..26c83098c4 100644
--- a/libavcodec/huffyuvdec.c
+++ b/libavcodec/huffyuvdec.c
@@ -708,7 +708,7 @@ static void add_bytes(HYuvContext *s, uint8_t *dst, uint8_t *src, int w)
if (s->bps <= 8) {
s->dsp.add_bytes(dst, src, w);
} else {
- s->dsp.add_int16((uint16_t*)dst, (const uint16_t*)src, s->n - 1, w);
+ s->llviddsp.add_int16((uint16_t*)dst, (const uint16_t*)src, s->n - 1, w);
}
}
diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c
index 2bc95b59e8..206f182850 100644
--- a/libavcodec/huffyuvenc.c
+++ b/libavcodec/huffyuvenc.c
@@ -41,7 +41,7 @@ static inline void diff_bytes(HYuvContext *s, uint8_t *dst,
if (s->bps <= 8) {
s->dsp.diff_bytes(dst, src0, src1, w);
} else {
- s->dsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w);
+ s->llviddsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w);
}
}
diff --git a/libavcodec/lossless_videodsp.c b/libavcodec/lossless_videodsp.c
new file mode 100644
index 0000000000..7654f3ef67
--- /dev/null
+++ b/libavcodec/lossless_videodsp.c
@@ -0,0 +1,69 @@
+/*
+ * Lossless video DSP utils
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+#include "lossless_videodsp.h"
+
+static void add_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w){
+ long i;
+ unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
+ unsigned long pw_msb = pw_lsb + 0x0001000100010001ULL;
+ for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
+ long a = *(long*)(src+i);
+ long b = *(long*)(dst+i);
+ *(long*)(dst+i) = ((a&pw_lsb) + (b&pw_lsb)) ^ ((a^b)&pw_msb);
+ }
+ for(; i<w; i++)
+ dst[i] = (dst[i] + src[i]) & mask;
+}
+
+static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){
+ long i;
+#if !HAVE_FAST_UNALIGNED
+ if((long)src2 & (sizeof(long)-1)){
+ for(i=0; i+7<w; i+=8){
+ dst[i+0] = (src1[i+0]-src2[i+0]) & mask;
+ dst[i+1] = (src1[i+1]-src2[i+1]) & mask;
+ dst[i+2] = (src1[i+2]-src2[i+2]) & mask;
+ dst[i+3] = (src1[i+3]-src2[i+3]) & mask;
+ }
+ }else
+#endif
+ {
+ unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
+ unsigned long pw_msb = pw_lsb + 0x0001000100010001ULL;
+
+ for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
+ long a = *(long*)(src1+i);
+ long b = *(long*)(src2+i);
+ *(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb);
+ }
+ }
+ for (; i<w; i++)
+ dst[i] = (src1[i] - src2[i]) & mask;
+}
+
+void ff_llviddsp_init(LLVidDSPContext *c)
+{
+ c->add_int16 = add_int16_c;
+ c->diff_int16= diff_int16_c;
+
+ if (ARCH_X86)
+ ff_llviddsp_init_x86(c);
+}
diff --git a/libavcodec/lossless_videodsp.h b/libavcodec/lossless_videodsp.h
new file mode 100644
index 0000000000..79dcc7443a
--- /dev/null
+++ b/libavcodec/lossless_videodsp.h
@@ -0,0 +1,36 @@
+/*
+ * Lossless video DSP utils
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#ifndef AVCODEC_LOSSLESS_VIDEODSP_H
+#define AVCODEC_LOSSLESS_VIDEODSP_H
+
+#include "avcodec.h"
+#include "libavutil/cpu.h"
+
+typedef struct LLVidDSPContext {
+ void (*add_int16)(uint16_t *dst/*align 16*/, const uint16_t *src/*align 16*/, unsigned mask, int w);
+ void (*diff_int16)(uint16_t *dst/*align 16*/, const uint16_t *src1/*align 16*/, const uint16_t *src2/*align 1*/, unsigned mask, int w);
+} LLVidDSPContext;
+
+void ff_llviddsp_init(LLVidDSPContext *llviddsp);
+void ff_llviddsp_init_x86(LLVidDSPContext *llviddsp);
+
+#endif //AVCODEC_LOSSLESS_VIDEODSP_H
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index c7cdc2fcf5..dddaae1f72 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -18,6 +18,7 @@ OBJS-$(CONFIG_H264DSP) += x86/h264dsp_init.o
OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o
OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel.o
OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_init.o
+OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp_init.o
OBJS-$(CONFIG_LPC) += x86/lpc.o
OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o
OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o
@@ -86,6 +87,7 @@ YASM-OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel_8bit.o \
x86/qpel.o
YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \
x86/hpeldsp.o
+YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o
YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm
index 9450cd8fd6..7162034c67 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -466,70 +466,6 @@ cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left
ADD_HFYU_LEFT_LOOP 0, 0
-%macro ADD_INT16_LOOP 1 ; %1 = is_aligned
- movd m4, maskq
- punpcklwd m4, m4
- punpcklwd m4, m4
- punpcklwd m4, m4
- add wq, wq
- test wq, 2*mmsize - 1
- jz %%.tomainloop
-%%.wordloop:
- sub wq, 2
- mov ax, [srcq+wq]
- add ax, [dstq+wq]
- and ax, maskw
- mov [dstq+wq], ax
- test wq, 2*mmsize - 1
- jnz %%.wordloop
-%%.tomainloop:
- add srcq, wq
- add dstq, wq
- neg wq
- jz %%.end
-%%.loop:
-%if %1
- mova m0, [srcq+wq]
- mova m1, [dstq+wq]
- mova m2, [srcq+wq+mmsize]
- mova m3, [dstq+wq+mmsize]
-%else
- movu m0, [srcq+wq]
- movu m1, [dstq+wq]
- movu m2, [srcq+wq+mmsize]
- movu m3, [dstq+wq+mmsize]
-%endif
- paddw m0, m1
- paddw m2, m3
- pand m0, m4
- pand m2, m4
-%if %1
- mova [dstq+wq] , m0
- mova [dstq+wq+mmsize], m2
-%else
- movu [dstq+wq] , m0
- movu [dstq+wq+mmsize], m2
-%endif
- add wq, 2*mmsize
- jl %%.loop
-%%.end:
- RET
-%endmacro
-
-INIT_MMX mmx
-cglobal add_int16, 4,4,5, dst, src, mask, w
- ADD_INT16_LOOP 1
-
-INIT_XMM sse2
-cglobal add_int16, 4,4,5, dst, src, mask, w
- test srcq, mmsize-1
- jnz .unaligned
- test dstq, mmsize-1
- jnz .unaligned
- ADD_INT16_LOOP 1
-.unaligned:
- ADD_INT16_LOOP 0
-
;-----------------------------------------------------------------------------
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
; int32_t max, unsigned int len)
diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c
index 08bd29720a..e0b40410a7 100644
--- a/libavcodec/x86/dsputil_init.c
+++ b/libavcodec/x86/dsputil_init.c
@@ -542,7 +542,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
#endif /* HAVE_MMX_INLINE */
#if HAVE_MMX_EXTERNAL
- c->add_int16 = ff_add_int16_mmx;
c->vector_clip_int32 = ff_vector_clip_int32_mmx;
#endif /* HAVE_MMX_EXTERNAL */
}
@@ -626,8 +625,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
c->vector_clip_int32 = ff_vector_clip_int32_sse2;
}
c->bswap_buf = ff_bswap32_buf_sse2;
-
- c->add_int16 = ff_add_int16_sse2;
#endif /* HAVE_SSE2_EXTERNAL */
}
diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h
index e707e55a59..356b2c142f 100644
--- a/libavcodec/x86/dsputil_x86.h
+++ b/libavcodec/x86/dsputil_x86.h
@@ -116,8 +116,6 @@ void ff_clear_blocks_mmx(int16_t *blocks);
void ff_clear_blocks_sse(int16_t *blocks);
void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w);
-void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
-void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top,
const uint8_t *diff, int w,
diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm
new file mode 100644
index 0000000000..d33299ca8c
--- /dev/null
+++ b/libavcodec/x86/lossless_videodsp.asm
@@ -0,0 +1,88 @@
+;******************************************************************************
+;* SIMD lossless video DSP utils
+;* Copyright (c) 2014 Michael Niedermayer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_TEXT
+
+%macro ADD_INT16_LOOP 1 ; %1 = is_aligned
+ movd m4, maskq
+ punpcklwd m4, m4
+ punpcklwd m4, m4
+ punpcklwd m4, m4
+ add wq, wq
+ test wq, 2*mmsize - 1
+ jz %%.tomainloop
+%%.wordloop:
+ sub wq, 2
+ mov ax, [srcq+wq]
+ add ax, [dstq+wq]
+ and ax, maskw
+ mov [dstq+wq], ax
+ test wq, 2*mmsize - 1
+ jnz %%.wordloop
+%%.tomainloop:
+ add srcq, wq
+ add dstq, wq
+ neg wq
+ jz %%.end
+%%.loop:
+%if %1
+ mova m0, [srcq+wq]
+ mova m1, [dstq+wq]
+ mova m2, [srcq+wq+mmsize]
+ mova m3, [dstq+wq+mmsize]
+%else
+ movu m0, [srcq+wq]
+ movu m1, [dstq+wq]
+ movu m2, [srcq+wq+mmsize]
+ movu m3, [dstq+wq+mmsize]
+%endif
+ paddw m0, m1
+ paddw m2, m3
+ pand m0, m4
+ pand m2, m4
+%if %1
+ mova [dstq+wq] , m0
+ mova [dstq+wq+mmsize], m2
+%else
+ movu [dstq+wq] , m0
+ movu [dstq+wq+mmsize], m2
+%endif
+ add wq, 2*mmsize
+ jl %%.loop
+%%.end:
+ RET
+%endmacro
+
+INIT_MMX mmx
+cglobal add_int16, 4,4,5, dst, src, mask, w
+ ADD_INT16_LOOP 1
+
+INIT_XMM sse2
+cglobal add_int16, 4,4,5, dst, src, mask, w
+ test srcq, mmsize-1
+ jnz .unaligned
+ test dstq, mmsize-1
+ jnz .unaligned
+ ADD_INT16_LOOP 1
+.unaligned:
+ ADD_INT16_LOOP 0
diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c
new file mode 100644
index 0000000000..bc51d51cc1
--- /dev/null
+++ b/libavcodec/x86/lossless_videodsp_init.c
@@ -0,0 +1,38 @@
+/*
+ * Lossless video DSP utils
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../lossless_videodsp.h"
+#include "libavutil/x86/cpu.h"
+
+void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
+void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
+
+void ff_llviddsp_init_x86(LLVidDSPContext *c)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (EXTERNAL_MMX(cpu_flags)) {
+ c->add_int16 = ff_add_int16_mmx;
+ }
+
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ c->add_int16 = ff_add_int16_sse2;
+ }
+}