diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-03-24 02:12:17 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-03-24 02:16:11 +0100 |
commit | 2fd41c9067fc67b40f80e9cbd4787018009040db (patch) | |
tree | 378cc399057a6089f3f06bc62f0eff97d3ada56b /libavcodec | |
parent | 00dc0206cb5b351a66d7cce77b8a65fabe6ea7da (diff) | |
parent | 4ec153bb66a95da46c98e269bd0aa787e6172ed3 (diff) | |
download | ffmpeg-2fd41c9067fc67b40f80e9cbd4787018009040db.tar.gz |
Merge remote-tracking branch 'newdev/master'
* newdev/master:
avio: make udp_set_remote_url/get_local_port internal.
asfdec: also subtract preroll when reading simple index object
matroskaenc: remove a variable that's unused after bc17bd9.
avio: cosmetics - nicer vertical alignment.
Remove unnecessary icc version checks
Disable 'attribute "foo" ignored' warnings from icc
rtsp: Don't use a locale dependent format string
Add xd55 codec tag for XDCAM HD422 720p25 CBR files.
configure: get libavcodec version from new version.h header
lavc: move the version macros to a new installed header.
matroskaenc: simplify get_aac_sample_rates by using ff_mpeg4audio_get_config
Do not use format string "%0.3f" for RTSP Range field.
Add apply_window_int16() to DSPContext with x86-optimized versions and use it in the ac3_fixed encoder.
Document usage of import libraries created by dlltool
configure: Set the correct lib target for arm/wince dlltool
fate: simplify regression-funcs.sh
fate: add support for multithread testing
Conflicts:
libavformat/rtspdec.c
libavutil/attributes.h
libavutil/internal.h
libavutil/mem.h
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/Makefile | 2 | ||||
-rw-r--r-- | libavcodec/ac3enc.c | 2 | ||||
-rw-r--r-- | libavcodec/ac3enc_fixed.c | 10 | ||||
-rw-r--r-- | libavcodec/ac3enc_float.c | 4 | ||||
-rw-r--r-- | libavcodec/ac3tab.c | 2 | ||||
-rw-r--r-- | libavcodec/avcodec.h | 52 | ||||
-rw-r--r-- | libavcodec/dsputil.c | 14 | ||||
-rw-r--r-- | libavcodec/dsputil.h | 14 | ||||
-rw-r--r-- | libavcodec/version.h | 75 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 40 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_yasm.asm | 126 |
11 files changed, 275 insertions, 66 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 4d3fbe1771..ef91ee3159 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -3,7 +3,7 @@ include $(SUBDIR)../config.mak NAME = avcodec FFLIBS = avutil -HEADERS = avcodec.h avfft.h dxva2.h opt.h vaapi.h vdpau.h xvmc.h +HEADERS = avcodec.h avfft.h dxva2.h opt.h vaapi.h vdpau.h version.h xvmc.h OBJS = allcodecs.o \ audioconvert.o \ diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c index 4413e5bd6b..72b0291727 100644 --- a/libavcodec/ac3enc.c +++ b/libavcodec/ac3enc.c @@ -167,7 +167,7 @@ static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct, static void mdct512(AC3MDCTContext *mdct, CoefType *out, SampleType *in); static void apply_window(DSPContext *dsp, SampleType *output, const SampleType *input, - const SampleType *window, int n); + const SampleType *window, unsigned int len); static int normalize_samples(AC3EncodeContext *s); diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c index d1bb429ccc..f682aa625f 100644 --- a/libavcodec/ac3enc_fixed.c +++ b/libavcodec/ac3enc_fixed.c @@ -252,15 +252,9 @@ static void mdct512(AC3MDCTContext *mdct, int32_t *out, int16_t *in) * Apply KBD window to input samples prior to MDCT. */ static void apply_window(DSPContext *dsp, int16_t *output, const int16_t *input, - const int16_t *window, int n) + const int16_t *window, unsigned int len) { - int i; - int n2 = n >> 1; - - for (i = 0; i < n2; i++) { - output[i] = MUL16(input[i], window[i]) >> 15; - output[n-i-1] = MUL16(input[n-i-1], window[i]) >> 15; - } + dsp->apply_window_int16(output, input, window, len); } diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c index e0783f9aa3..f5b01f7d6f 100644 --- a/libavcodec/ac3enc_float.c +++ b/libavcodec/ac3enc_float.c @@ -83,9 +83,9 @@ static void mdct512(AC3MDCTContext *mdct, float *out, float *in) * Apply KBD window to input samples prior to MDCT. */ static void apply_window(DSPContext *dsp, float *output, const float *input, - const float *window, int n) + const float *window, unsigned int len) { - dsp->vector_fmul(output, input, window, n); + dsp->vector_fmul(output, input, window, len); } diff --git a/libavcodec/ac3tab.c b/libavcodec/ac3tab.c index f7b030209c..fb8e9c7b52 100644 --- a/libavcodec/ac3tab.c +++ b/libavcodec/ac3tab.c @@ -141,7 +141,7 @@ const uint8_t ff_ac3_rematrix_band_tab[5] = { 13, 25, 37, 61, 253 }; /* AC-3 MDCT window */ /* MDCT window */ -const int16_t ff_ac3_window[AC3_WINDOW_SIZE/2] = { +DECLARE_ALIGNED(16, const int16_t, ff_ac3_window)[AC3_WINDOW_SIZE/2] = { 4, 7, 12, 16, 21, 28, 34, 42, 51, 61, 72, 84, 97, 111, 127, 145, 164, 184, 207, 231, 257, 285, 315, 347, diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 0f9201d8e4..5a9411f633 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -31,57 +31,7 @@ #include "libavutil/avutil.h" #include "libavutil/cpu.h" -#define LIBAVCODEC_VERSION_MAJOR 52 -#define LIBAVCODEC_VERSION_MINOR 114 -#define LIBAVCODEC_VERSION_MICRO 0 - -#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ - LIBAVCODEC_VERSION_MINOR, \ - LIBAVCODEC_VERSION_MICRO) -#define LIBAVCODEC_VERSION AV_VERSION(LIBAVCODEC_VERSION_MAJOR, \ - LIBAVCODEC_VERSION_MINOR, \ - LIBAVCODEC_VERSION_MICRO) -#define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT - -#define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION) - -/** - * Those FF_API_* defines are not part of public API. - * They may change, break or disappear at any time. - */ -#ifndef FF_API_PALETTE_CONTROL -#define FF_API_PALETTE_CONTROL (LIBAVCODEC_VERSION_MAJOR < 54) -#endif -#ifndef FF_API_MM_FLAGS -#define FF_API_MM_FLAGS (LIBAVCODEC_VERSION_MAJOR < 53) -#endif -#ifndef FF_API_OPT_SHOW -#define FF_API_OPT_SHOW (LIBAVCODEC_VERSION_MAJOR < 53) -#endif -#ifndef FF_API_AUDIO_OLD -#define FF_API_AUDIO_OLD (LIBAVCODEC_VERSION_MAJOR < 53) -#endif -#ifndef FF_API_VIDEO_OLD -#define FF_API_VIDEO_OLD (LIBAVCODEC_VERSION_MAJOR < 53) -#endif -#ifndef FF_API_SUBTITLE_OLD -#define FF_API_SUBTITLE_OLD (LIBAVCODEC_VERSION_MAJOR < 53) -#endif -#ifndef FF_API_USE_LPC -#define FF_API_USE_LPC (LIBAVCODEC_VERSION_MAJOR < 53) -#endif -#ifndef FF_API_SET_STRING_OLD -#define FF_API_SET_STRING_OLD (LIBAVCODEC_VERSION_MAJOR < 53) -#endif -#ifndef FF_API_INOFFICIAL -#define FF_API_INOFFICIAL (LIBAVCODEC_VERSION_MAJOR < 53) -#endif -#ifndef FF_API_OLD_SAMPLE_FMT -#define FF_API_OLD_SAMPLE_FMT (LIBAVCODEC_VERSION_MAJOR < 53) -#endif -#ifndef FF_API_OLD_AUDIOCONVERT -#define FF_API_OLD_AUDIOCONVERT (LIBAVCODEC_VERSION_MAJOR < 53) -#endif +#include "libavcodec/version.h" #if LIBAVCODEC_VERSION_MAJOR < 53 # define FF_INTERNALC_MEM_TYPE unsigned int diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index d4e538cd07..951236c8c3 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -3895,6 +3895,19 @@ static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, co return res; } +static void apply_window_int16_c(int16_t *output, const int16_t *input, + const int16_t *window, unsigned int len) +{ + int i; + int len2 = len >> 1; + + for (i = 0; i < len2; i++) { + int16_t w = window[i]; + output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15; + output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15; + } +} + #define W0 2048 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ @@ -4369,6 +4382,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->vector_clipf = vector_clipf_c; c->scalarproduct_int16 = scalarproduct_int16_c; c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; + c->apply_window_int16 = apply_window_int16_c; c->scalarproduct_float = scalarproduct_float_c; c->butterflies_float = butterflies_float_c; c->vector_fmul_scalar = vector_fmul_scalar_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index d42abe8956..4f1f650dc8 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -526,6 +526,20 @@ typedef struct DSPContext { */ int32_t (*scalarproduct_and_madd_int16)(int16_t *v1/*align 16*/, const int16_t *v2, const int16_t *v3, int len, int mul); + /** + * Apply symmetric window in 16-bit fixed-point. + * @param output destination array + * constraints: 16-byte aligned + * @param input source array + * constraints: 16-byte aligned + * @param window window array + * constraints: 16-byte aligned, at least len/2 elements + * @param len full window length + * constraints: multiple of ? greater than zero + */ + void (*apply_window_int16)(int16_t *output, const int16_t *input, + const int16_t *window, unsigned int len); + /* rv30 functions */ qpel_mc_func put_rv30_tpel_pixels_tab[4][16]; qpel_mc_func avg_rv30_tpel_pixels_tab[4][16]; diff --git a/libavcodec/version.h b/libavcodec/version.h new file mode 100644 index 0000000000..fe608c78db --- /dev/null +++ b/libavcodec/version.h @@ -0,0 +1,75 @@ +/* + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VERSION_H +#define AVCODEC_VERSION_H + +#define LIBAVCODEC_VERSION_MAJOR 52 +#define LIBAVCODEC_VERSION_MINOR 114 +#define LIBAVCODEC_VERSION_MICRO 0 + +#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ + LIBAVCODEC_VERSION_MINOR, \ + LIBAVCODEC_VERSION_MICRO) +#define LIBAVCODEC_VERSION AV_VERSION(LIBAVCODEC_VERSION_MAJOR, \ + LIBAVCODEC_VERSION_MINOR, \ + LIBAVCODEC_VERSION_MICRO) +#define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT + +#define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION) + +/** + * Those FF_API_* defines are not part of public API. + * They may change, break or disappear at any time. + */ +#ifndef FF_API_PALETTE_CONTROL +#define FF_API_PALETTE_CONTROL (LIBAVCODEC_VERSION_MAJOR < 54) +#endif +#ifndef FF_API_MM_FLAGS +#define FF_API_MM_FLAGS (LIBAVCODEC_VERSION_MAJOR < 53) +#endif +#ifndef FF_API_OPT_SHOW +#define FF_API_OPT_SHOW (LIBAVCODEC_VERSION_MAJOR < 53) +#endif +#ifndef FF_API_AUDIO_OLD +#define FF_API_AUDIO_OLD (LIBAVCODEC_VERSION_MAJOR < 53) +#endif +#ifndef FF_API_VIDEO_OLD +#define FF_API_VIDEO_OLD (LIBAVCODEC_VERSION_MAJOR < 53) +#endif +#ifndef FF_API_SUBTITLE_OLD +#define FF_API_SUBTITLE_OLD (LIBAVCODEC_VERSION_MAJOR < 53) +#endif +#ifndef FF_API_USE_LPC +#define FF_API_USE_LPC (LIBAVCODEC_VERSION_MAJOR < 53) +#endif +#ifndef FF_API_SET_STRING_OLD +#define FF_API_SET_STRING_OLD (LIBAVCODEC_VERSION_MAJOR < 53) +#endif +#ifndef FF_API_INOFFICIAL +#define FF_API_INOFFICIAL (LIBAVCODEC_VERSION_MAJOR < 53) +#endif +#ifndef FF_API_OLD_SAMPLE_FMT +#define FF_API_OLD_SAMPLE_FMT (LIBAVCODEC_VERSION_MAJOR < 53) +#endif +#ifndef FF_API_OLD_AUDIOCONVERT +#define FF_API_OLD_AUDIOCONVERT (LIBAVCODEC_VERSION_MAJOR < 53) +#endif + +#endif /* AVCODEC_VERSION_H */ diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 3272556a74..c163a16848 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2393,6 +2393,20 @@ int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int or int32_t ff_scalarproduct_and_madd_int16_mmx2(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); + +void ff_apply_window_int16_mmxext (int16_t *output, const int16_t *input, + const int16_t *window, unsigned int len); +void ff_apply_window_int16_mmxext_ba (int16_t *output, const int16_t *input, + const int16_t *window, unsigned int len); +void ff_apply_window_int16_sse2 (int16_t *output, const int16_t *input, + const int16_t *window, unsigned int len); +void ff_apply_window_int16_sse2_ba (int16_t *output, const int16_t *input, + const int16_t *window, unsigned int len); +void ff_apply_window_int16_ssse3 (int16_t *output, const int16_t *input, + const int16_t *window, unsigned int len); +void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, + const int16_t *window, unsigned int len); + void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top); int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int w, int left); int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, int left); @@ -2754,6 +2768,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) #if HAVE_YASM c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2; c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmx2; + if (avctx->flags & CODEC_FLAG_BITEXACT) { + c->apply_window_int16 = ff_apply_window_int16_mmxext_ba; + } else { + c->apply_window_int16 = ff_apply_window_int16_mmxext; + } #endif } if(mm_flags & AV_CPU_FLAG_SSE){ @@ -2776,13 +2795,30 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) #if HAVE_YASM c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; + if (avctx->flags & CODEC_FLAG_BITEXACT) { + c->apply_window_int16 = ff_apply_window_int16_sse2_ba; + } else { + if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { + c->apply_window_int16 = ff_apply_window_int16_sse2; + } + } c->emulated_edge_mc = emulated_edge_mc_sse; c->gmc= gmc_sse; #endif } - if((mm_flags & AV_CPU_FLAG_SSSE3) && !(mm_flags & (AV_CPU_FLAG_SSE42|AV_CPU_FLAG_3DNOW)) && HAVE_YASM) // cachesplit - c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; + if (mm_flags & AV_CPU_FLAG_SSSE3) { +#if HAVE_YASM + if (mm_flags & AV_CPU_FLAG_ATOM) { + c->apply_window_int16 = ff_apply_window_int16_ssse3_atom; + } else { + c->apply_window_int16 = ff_apply_window_int16_ssse3; + } + if (!(mm_flags & (AV_CPU_FLAG_SSE42|AV_CPU_FLAG_3DNOW))) { // cachesplit + c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; + } +#endif + } } if (CONFIG_ENCODERS) diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm index 53884a6b40..4658e5e3fa 100644 --- a/libavcodec/x86/dsputil_yasm.asm +++ b/libavcodec/x86/dsputil_yasm.asm @@ -27,6 +27,8 @@ pb_zzzzzzzz77777777: times 8 db -1 pb_7: times 8 db 7 pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11 pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13 +pb_revwords: db 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 +pd_16384: times 4 dd 16384 section .text align=16 @@ -202,6 +204,130 @@ SCALARPRODUCT_LOOP 0 RET +;----------------------------------------------------------------------------- +; void ff_apply_window_int16(int16_t *output, const int16_t *input, +; const int16_t *window, unsigned int len) +;----------------------------------------------------------------------------- + +%macro REVERSE_WORDS_MMXEXT 1-2 + pshufw %1, %1, 0x1B +%endmacro + +%macro REVERSE_WORDS_SSE2 1-2 + pshuflw %1, %1, 0x1B + pshufhw %1, %1, 0x1B + pshufd %1, %1, 0x4E +%endmacro + +%macro REVERSE_WORDS_SSSE3 2 + pshufb %1, %2 +%endmacro + +; dst = (dst * src) >> 15 +; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back +; in from the pmullw result. +%macro MUL16FIXED_MMXEXT 3 ; dst, src, temp + mova %3, %1 + pmulhw %1, %2 + pmullw %3, %2 + psrlw %3, 15 + psllw %1, 1 + por %1, %3 +%endmacro + +; dst = ((dst * src) + (1<<14)) >> 15 +%macro MUL16FIXED_SSSE3 3 ; dst, src, unused + pmulhrsw %1, %2 +%endmacro + +%macro APPLY_WINDOW_INT16 3 ; %1=instruction set, %2=mmxext/sse2 bit exact version, %3=has_ssse3 +cglobal apply_window_int16_%1, 4,5,6, output, input, window, offset, offset2 + lea offset2q, [offsetq-mmsize] +%if %2 + mova m5, [pd_16384] +%elifidn %1, ssse3 + mova m5, [pb_revwords] + ALIGN 16 +%endif +.loop: +%if %2 + ; This version expands 16-bit to 32-bit, multiplies by the window, + ; adds 16384 for rounding, right shifts 15, then repacks back to words to + ; save to the output. The window is reversed for the second half. + mova m3, [windowq+offset2q] + mova m4, [ inputq+offset2q] + pxor m0, m0 + punpcklwd m0, m3 + punpcklwd m1, m4 + pmaddwd m0, m1 + paddd m0, m5 + psrad m0, 15 + pxor m2, m2 + punpckhwd m2, m3 + punpckhwd m1, m4 + pmaddwd m2, m1 + paddd m2, m5 + psrad m2, 15 + packssdw m0, m2 + mova [outputq+offset2q], m0 + REVERSE_WORDS m3 + mova m4, [ inputq+offsetq] + pxor m0, m0 + punpcklwd m0, m3 + punpcklwd m1, m4 + pmaddwd m0, m1 + paddd m0, m5 + psrad m0, 15 + pxor m2, m2 + punpckhwd m2, m3 + punpckhwd m1, m4 + pmaddwd m2, m1 + paddd m2, m5 + psrad m2, 15 + packssdw m0, m2 + mova [outputq+offsetq], m0 +%elif %3 + ; This version does the 16x16->16 multiplication in-place without expanding + ; to 32-bit. The ssse3 version is bit-identical. + mova m0, [windowq+offset2q] + mova m1, [ inputq+offset2q] + pmulhrsw m1, m0 + REVERSE_WORDS m0, m5 + pmulhrsw m0, [ inputq+offsetq ] + mova [outputq+offset2q], m1 + mova [outputq+offsetq ], m0 +%else + ; This version does the 16x16->16 multiplication in-place without expanding + ; to 32-bit. The mmxext and sse2 versions do not use rounding, and + ; therefore are not bit-identical to the C version. + mova m0, [windowq+offset2q] + mova m1, [ inputq+offset2q] + mova m2, [ inputq+offsetq ] + MUL16FIXED m1, m0, m3 + REVERSE_WORDS m0 + MUL16FIXED m2, m0, m3 + mova [outputq+offset2q], m1 + mova [outputq+offsetq ], m2 +%endif + add offsetd, mmsize + sub offset2d, mmsize + jae .loop + REP_RET +%endmacro + +INIT_MMX +%define REVERSE_WORDS REVERSE_WORDS_MMXEXT +%define MUL16FIXED MUL16FIXED_MMXEXT +APPLY_WINDOW_INT16 mmxext, 0, 0 +APPLY_WINDOW_INT16 mmxext_ba, 1, 0 +INIT_XMM +%define REVERSE_WORDS REVERSE_WORDS_SSE2 +APPLY_WINDOW_INT16 sse2, 0, 0 +APPLY_WINDOW_INT16 sse2_ba, 1, 0 +APPLY_WINDOW_INT16 ssse3_atom, 0, 1 +%define REVERSE_WORDS REVERSE_WORDS_SSSE3 +APPLY_WINDOW_INT16 ssse3, 0, 1 + ; void add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top) cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_top |