diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2014-06-22 17:58:28 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-06-22 17:58:28 +0200 |
commit | 99497b4683e5054bcdc5b6802a27d717df9e04f3 (patch) | |
tree | 130022374c1a92b72288272bd0927ae6ac7d825b /libavcodec | |
parent | 0dae193d3ecf5d0dc687f5ad708419bf7600de9a (diff) | |
parent | 9a9e2f1c8aa4539a261625145e5c1f46a8106ac2 (diff) | |
download | ffmpeg-99497b4683e5054bcdc5b6802a27d717df9e04f3.tar.gz |
Merge commit '9a9e2f1c8aa4539a261625145e5c1f46a8106ac2'
* commit '9a9e2f1c8aa4539a261625145e5c1f46a8106ac2':
dsputil: Split audio operations off into a separate context
Conflicts:
configure
libavcodec/takdec.c
libavcodec/x86/Makefile
libavcodec/x86/dsputil.asm
libavcodec/x86/dsputil_init.c
libavcodec/x86/dsputil_mmx.c
libavcodec/x86/dsputil_x86.h
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
40 files changed, 657 insertions, 384 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 3ff073d958..253ede9306 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -33,6 +33,7 @@ OBJS = allcodecs.o \ OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o OBJS-$(CONFIG_AC3DSP) += ac3dsp.o OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o +OBJS-$(CONFIG_AUDIODSP) += audiodsp.o OBJS-$(CONFIG_BLOCKDSP) += blockdsp.o OBJS-$(CONFIG_CABAC) += cabac.o OBJS-$(CONFIG_CRYSTALHD) += crystalhd.o diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c index 9bc22bede4..dc974702d6 100644 --- a/libavcodec/ac3enc.c +++ b/libavcodec/ac3enc.c @@ -37,6 +37,7 @@ #include "libavutil/opt.h" #include "avcodec.h" #include "put_bits.h" +#include "audiodsp.h" #include "ac3dsp.h" #include "ac3.h" #include "fft.h" @@ -2478,6 +2479,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx) if (ret) goto init_fail; + ff_audiodsp_init(&s->adsp); ff_dsputil_init(&s->dsp, avctx); ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT); diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h index 13490340b3..0ce44bab1c 100644 --- a/libavcodec/ac3enc.h +++ b/libavcodec/ac3enc.h @@ -39,6 +39,7 @@ #include "fft.h" #include "mathops.h" #include "put_bits.h" +#include "audiodsp.h" #ifndef CONFIG_AC3ENC_FLOAT #define CONFIG_AC3ENC_FLOAT 0 @@ -162,6 +163,7 @@ typedef struct AC3EncodeContext { AVCodecContext *avctx; ///< parent AVCodecContext PutBitContext pb; ///< bitstream writer context DSPContext dsp; + AudioDSPContext adsp; AVFloatDSPContext fdsp; AC3DSPContext ac3dsp; ///< AC-3 optimized functions FFTContext mdct; ///< FFT context for MDCT calculation diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c index 3994c17d3b..9d39026dd5 100644 --- a/libavcodec/ac3enc_fixed.c +++ b/libavcodec/ac3enc_fixed.c @@ -29,6 +29,7 @@ #define FFT_FLOAT 0 #undef CONFIG_AC3ENC_FLOAT #include "internal.h" +#include "audiodsp.h" #include "ac3enc.h" #include "eac3enc.h" @@ -111,9 +112,10 @@ static void sum_square_butterfly(AC3EncodeContext *s, int64_t sum[4], /* * Clip MDCT coefficients to allowable range. */ -static void clip_coefficients(DSPContext *dsp, int32_t *coef, unsigned int len) +static void clip_coefficients(AudioDSPContext *adsp, int32_t *coef, + unsigned int len) { - dsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len); + adsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len); } diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c index fca95b1819..fa6e50981b 100644 --- a/libavcodec/ac3enc_float.c +++ b/libavcodec/ac3enc_float.c @@ -28,6 +28,7 @@ #define CONFIG_AC3ENC_FLOAT 1 #include "internal.h" +#include "audiodsp.h" #include "ac3enc.h" #include "eac3enc.h" #include "kbdwin.h" @@ -117,9 +118,10 @@ static void sum_square_butterfly(AC3EncodeContext *s, float sum[4], /* * Clip MDCT coefficients to allowable range. */ -static void clip_coefficients(DSPContext *dsp, float *coef, unsigned int len) +static void clip_coefficients(AudioDSPContext *adsp, float *coef, + unsigned int len) { - dsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len); + adsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len); } diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c index 4527519175..192d16f57e 100644 --- a/libavcodec/ac3enc_template.c +++ b/libavcodec/ac3enc_template.c @@ -30,6 +30,8 @@ #include "libavutil/attributes.h" #include "libavutil/internal.h" + +#include "audiodsp.h" #include "internal.h" #include "ac3enc.h" #include "eac3enc.h" @@ -40,7 +42,8 @@ static void scale_coefficients(AC3EncodeContext *s); static int normalize_samples(AC3EncodeContext *s); -static void clip_coefficients(DSPContext *dsp, CoefType *coef, unsigned int len); +static void clip_coefficients(AudioDSPContext *adsp, CoefType *coef, + unsigned int len); static CoefType calc_cpl_coord(CoefSumType energy_ch, CoefSumType energy_cpl); @@ -164,7 +167,7 @@ static void apply_channel_coupling(AC3EncodeContext *s) } /* coefficients must be clipped in order to be encoded */ - clip_coefficients(&s->dsp, cpl_coef, num_cpl_coefs); + clip_coefficients(&s->adsp, cpl_coef, num_cpl_coefs); } /* calculate energy in each band in coupling channel and each fbw channel */ @@ -407,7 +410,7 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, AVPacket *avpkt, if (s->fixed_point) scale_coefficients(s); - clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1], + clip_coefficients(&s->adsp, s->blocks[0].mdct_coef[1], AC3_MAX_COEFS * s->num_blocks * s->channels); s->cpl_on = s->cpl_enabled; diff --git a/libavcodec/acelp_pitch_delay.c b/libavcodec/acelp_pitch_delay.c index c005c4b4e8..3ecec01cbe 100644 --- a/libavcodec/acelp_pitch_delay.c +++ b/libavcodec/acelp_pitch_delay.c @@ -27,6 +27,7 @@ #include "avcodec.h" #include "acelp_pitch_delay.h" #include "celp_math.h" +#include "audiodsp.h" int ff_acelp_decode_8bit_to_1st_delay3(int ac_index) { @@ -91,7 +92,7 @@ void ff_acelp_update_past_gain( } int16_t ff_acelp_decode_gain_code( - DSPContext *dsp, + AudioDSPContext *adsp, int gain_corr_factor, const int16_t* fc_v, int mr_energy, @@ -118,7 +119,7 @@ int16_t ff_acelp_decode_gain_code( ); #else mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) / - sqrt(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size)); + sqrt(adsp->scalarproduct_int16(fc_v, fc_v, subframe_size)); return mr_energy >> 12; #endif } diff --git a/libavcodec/acelp_pitch_delay.h b/libavcodec/acelp_pitch_delay.h index 72977f1f49..2aade2f226 100644 --- a/libavcodec/acelp_pitch_delay.h +++ b/libavcodec/acelp_pitch_delay.h @@ -24,7 +24,8 @@ #define AVCODEC_ACELP_PITCH_DELAY_H #include <stdint.h> -#include "dsputil.h" + +#include "audiodsp.h" #define PITCH_DELAY_MIN 20 #define PITCH_DELAY_MAX 143 @@ -139,7 +140,7 @@ void ff_acelp_update_past_gain( /** * @brief Decode the adaptive codebook gain and add * correction (4.1.5 and 3.9.1 of G.729). - * @param dsp initialized dsputil context + * @param adsp initialized audio DSP context * @param gain_corr_factor gain correction factor (2.13) * @param fc_v fixed-codebook vector (2.13) * @param mr_energy mean innovation energy and fixed-point correction (7.13) @@ -208,7 +209,7 @@ void ff_acelp_update_past_gain( * @remark The routine is used in G.729 and AMR (all modes). */ int16_t ff_acelp_decode_gain_code( - DSPContext *dsp, + AudioDSPContext *adsp, int gain_corr_factor, const int16_t* fc_v, int mr_energy, diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index ed2306a4ec..66a214028e 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -4,6 +4,7 @@ OBJS += arm/fmtconvert_init_arm.o OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ arm/ac3dsp_arm.o +OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_arm.o OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \ arm/dsputil_arm.o \ @@ -80,11 +81,13 @@ VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \ NEON-OBJS += arm/fmtconvert_neon.o NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o +NEON-OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_neon.o \ + arm/audiodsp_neon.o \ + arm/int_neon.o NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \ arm/blockdsp_neon.o NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \ arm/dsputil_neon.o \ - arm/int_neon.o \ arm/simple_idct_neon.o NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ arm/fft_fixed_neon.o diff --git a/libavcodec/arm/audiodsp_arm.h b/libavcodec/arm/audiodsp_arm.h new file mode 100644 index 0000000000..213660dae7 --- /dev/null +++ b/libavcodec/arm/audiodsp_arm.h @@ -0,0 +1,26 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_ARM_AUDIODSP_ARM_H +#define AVCODEC_ARM_AUDIODSP_ARM_H + +#include "libavcodec/audiodsp.h" + +void ff_audiodsp_init_neon(AudioDSPContext *c); + +#endif /* AVCODEC_ARM_AUDIODSP_ARM_H */ diff --git a/libavcodec/arm/audiodsp_init_arm.c b/libavcodec/arm/audiodsp_init_arm.c new file mode 100644 index 0000000000..74aa52a4ef --- /dev/null +++ b/libavcodec/arm/audiodsp_init_arm.c @@ -0,0 +1,33 @@ +/* + * ARM optimized audio functions + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/arm/cpu.h" +#include "libavcodec/audiodsp.h" +#include "audiodsp_arm.h" + +av_cold void ff_audiodsp_init_arm(AudioDSPContext *c) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) + ff_audiodsp_init_neon(c); +} diff --git a/libavcodec/arm/audiodsp_init_neon.c b/libavcodec/arm/audiodsp_init_neon.c new file mode 100644 index 0000000000..f7bd162482 --- /dev/null +++ b/libavcodec/arm/audiodsp_init_neon.c @@ -0,0 +1,41 @@ +/* + * ARM NEON optimised audio functions + * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "libavutil/attributes.h" +#include "libavcodec/audiodsp.h" +#include "audiodsp_arm.h" + +void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, + int len); +void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, + int32_t max, unsigned int len); + +int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len); + +av_cold void ff_audiodsp_init_neon(AudioDSPContext *c) +{ + c->vector_clip_int32 = ff_vector_clip_int32_neon; + c->vector_clipf = ff_vector_clipf_neon; + + c->scalarproduct_int16 = ff_scalarproduct_int16_neon; +} diff --git a/libavcodec/arm/audiodsp_neon.S b/libavcodec/arm/audiodsp_neon.S new file mode 100644 index 0000000000..ab32cef7ab --- /dev/null +++ b/libavcodec/arm/audiodsp_neon.S @@ -0,0 +1,64 @@ +/* + * ARM NEON optimised audio functions + * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/arm/asm.S" + +function ff_vector_clipf_neon, export=1 +VFP vdup.32 q1, d0[1] +VFP vdup.32 q0, d0[0] +NOVFP vdup.32 q0, r2 +NOVFP vdup.32 q1, r3 +NOVFP ldr r2, [sp] + vld1.f32 {q2},[r1,:128]! + vmin.f32 q10, q2, q1 + vld1.f32 {q3},[r1,:128]! + vmin.f32 q11, q3, q1 +1: vmax.f32 q8, q10, q0 + vmax.f32 q9, q11, q0 + subs r2, r2, #8 + beq 2f + vld1.f32 {q2},[r1,:128]! + vmin.f32 q10, q2, q1 + vld1.f32 {q3},[r1,:128]! + vmin.f32 q11, q3, q1 + vst1.f32 {q8},[r0,:128]! + vst1.f32 {q9},[r0,:128]! + b 1b +2: vst1.f32 {q8},[r0,:128]! + vst1.f32 {q9},[r0,:128]! + bx lr +endfunc + +function ff_vector_clip_int32_neon, export=1 + vdup.32 q0, r2 + vdup.32 q1, r3 + ldr r2, [sp] +1: + vld1.32 {q2-q3}, [r1,:128]! + vmin.s32 q2, q2, q1 + vmin.s32 q3, q3, q1 + vmax.s32 q2, q2, q0 + vmax.s32 q3, q3, q0 + vst1.32 {q2-q3}, [r0,:128]! + subs r2, r2, #8 + bgt 1b + bx lr +endfunc diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 797983c76c..cf4017f236 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -34,13 +34,6 @@ void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); -void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, - int len); -void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, - int32_t max, unsigned int len); - -int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len); - av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { @@ -58,9 +51,4 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, c->add_pixels_clamped = ff_add_pixels_clamped_neon; c->put_pixels_clamped = ff_put_pixels_clamped_neon; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon; - - c->vector_clipf = ff_vector_clipf_neon; - c->vector_clip_int32 = ff_vector_clip_int32_neon; - - c->scalarproduct_int16 = ff_scalarproduct_int16_neon; } diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index a8e1db5ca1..4a2fce0005 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -126,45 +126,3 @@ function ff_add_pixels_clamped_neon, export=1 vst1.8 {d6}, [r3,:64], r2 bx lr endfunc - -function ff_vector_clipf_neon, export=1 -VFP vdup.32 q1, d0[1] -VFP vdup.32 q0, d0[0] -NOVFP vdup.32 q0, r2 -NOVFP vdup.32 q1, r3 -NOVFP ldr r2, [sp] - vld1.f32 {q2},[r1,:128]! - vmin.f32 q10, q2, q1 - vld1.f32 {q3},[r1,:128]! - vmin.f32 q11, q3, q1 -1: vmax.f32 q8, q10, q0 - vmax.f32 q9, q11, q0 - subs r2, r2, #8 - beq 2f - vld1.f32 {q2},[r1,:128]! - vmin.f32 q10, q2, q1 - vld1.f32 {q3},[r1,:128]! - vmin.f32 q11, q3, q1 - vst1.f32 {q8},[r0,:128]! - vst1.f32 {q9},[r0,:128]! - b 1b -2: vst1.f32 {q8},[r0,:128]! - vst1.f32 {q9},[r0,:128]! - bx lr -endfunc - -function ff_vector_clip_int32_neon, export=1 - vdup.32 q0, r2 - vdup.32 q1, r3 - ldr r2, [sp] -1: - vld1.32 {q2-q3}, [r1,:128]! - vmin.s32 q2, q2, q1 - vmin.s32 q3, q3, q1 - vmax.s32 q2, q2, q0 - vmax.s32 q3, q3, q0 - vst1.32 {q2-q3}, [r0,:128]! - subs r2, r2, #8 - bgt 1b - bx lr -endfunc diff --git a/libavcodec/audiodsp.c b/libavcodec/audiodsp.c new file mode 100644 index 0000000000..85b5a74947 --- /dev/null +++ b/libavcodec/audiodsp.c @@ -0,0 +1,118 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "libavutil/attributes.h" +#include "libavutil/common.h" +#include "audiodsp.h" + +static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, + uint32_t maxi, uint32_t maxisign) +{ + if (a > mini) + return mini; + else if ((a ^ (1U << 31)) > maxisign) + return maxi; + else + return a; +} + +static void vector_clipf_c_opposite_sign(float *dst, const float *src, + float *min, float *max, int len) +{ + int i; + uint32_t mini = *(uint32_t *) min; + uint32_t maxi = *(uint32_t *) max; + uint32_t maxisign = maxi ^ (1U << 31); + uint32_t *dsti = (uint32_t *) dst; + const uint32_t *srci = (const uint32_t *) src; + + for (i = 0; i < len; i += 8) { + dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign); + dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign); + dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign); + dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign); + dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign); + dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign); + dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign); + dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign); + } +} + +static void vector_clipf_c(float *dst, const float *src, + float min, float max, int len) +{ + int i; + + if (min < 0 && max > 0) { + vector_clipf_c_opposite_sign(dst, src, &min, &max, len); + } else { + for (i = 0; i < len; i += 8) { + dst[i] = av_clipf(src[i], min, max); + dst[i + 1] = av_clipf(src[i + 1], min, max); + dst[i + 2] = av_clipf(src[i + 2], min, max); + dst[i + 3] = av_clipf(src[i + 3], min, max); + dst[i + 4] = av_clipf(src[i + 4], min, max); + dst[i + 5] = av_clipf(src[i + 5], min, max); + dst[i + 6] = av_clipf(src[i + 6], min, max); + dst[i + 7] = av_clipf(src[i + 7], min, max); + } + } +} + +static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2, + int order) +{ + int res = 0; + + while (order--) + res += *v1++ **v2++; + + return res; +} + +static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, + int32_t max, unsigned int len) +{ + do { + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + len -= 8; + } while (len > 0); +} + +av_cold void ff_audiodsp_init(AudioDSPContext *c) +{ + c->scalarproduct_int16 = scalarproduct_int16_c; + c->vector_clip_int32 = vector_clip_int32_c; + c->vector_clipf = vector_clipf_c; + + if (ARCH_ARM) + ff_audiodsp_init_arm(c); + if (ARCH_PPC) + ff_audiodsp_init_ppc(c); + if (ARCH_X86) + ff_audiodsp_init_x86(c); +} diff --git a/libavcodec/audiodsp.h b/libavcodec/audiodsp.h new file mode 100644 index 0000000000..b55bf858e0 --- /dev/null +++ b/libavcodec/audiodsp.h @@ -0,0 +1,59 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_AUDIODSP_H +#define AVCODEC_AUDIODSP_H + +#include <stdint.h> + +typedef struct AudioDSPContext { + /** + * Calculate scalar product of two vectors. + * @param len length of vectors, should be multiple of 16 + */ + int32_t (*scalarproduct_int16)(const int16_t *v1, + const int16_t *v2 /* align 16 */, int len); + + /** + * Clip each element in an array of int32_t to a given minimum and + * maximum value. + * @param dst destination array + * constraints: 16-byte aligned + * @param src source array + * constraints: 16-byte aligned + * @param min minimum value + * constraints: must be in the range [-(1 << 24), 1 << 24] + * @param max maximum value + * constraints: must be in the range [-(1 << 24), 1 << 24] + * @param len number of elements in the array + * constraints: multiple of 32 greater than zero + */ + void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, + int32_t max, unsigned int len); + /* assume len is a multiple of 8, and arrays are 16-byte aligned */ + void (*vector_clipf)(float *dst /* align 16 */, + const float *src /* align 16 */, + float min, float max, int len /* align 16 */); +} AudioDSPContext; + +void ff_audiodsp_init(AudioDSPContext *c); +void ff_audiodsp_init_arm(AudioDSPContext *c); +void ff_audiodsp_init_ppc(AudioDSPContext *c); +void ff_audiodsp_init_x86(AudioDSPContext *c); + +#endif /* AVCODEC_AUDIODSP_H */ diff --git a/libavcodec/cook.c b/libavcodec/cook.c index d84d755dea..5860288e04 100644 --- a/libavcodec/cook.c +++ b/libavcodec/cook.c @@ -44,9 +44,10 @@ #include "libavutil/channel_layout.h" #include "libavutil/lfg.h" + +#include "audiodsp.h" #include "avcodec.h" #include "get_bits.h" -#include "dsputil.h" #include "bytestream.h" #include "fft.h" #include "internal.h" @@ -123,7 +124,7 @@ typedef struct cook { void (*saturate_output)(struct cook *q, float *out); AVCodecContext* avctx; - DSPContext dsp; + AudioDSPContext adsp; GetBitContext gb; /* stream data */ int num_vectors; @@ -873,8 +874,8 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p, */ static void saturate_output_float(COOKContext *q, float *out) { - q->dsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel, - -1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8)); + q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel, + -1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8)); } @@ -1072,7 +1073,7 @@ static av_cold int cook_decode_init(AVCodecContext *avctx) /* Initialize RNG. */ av_lfg_init(&q->random_state, 0); - ff_dsputil_init(&q->dsp, avctx); + ff_audiodsp_init(&q->adsp); while (edata_ptr < edata_ptr_end) { /* 8 for mono, 16 for stereo, ? for multichannel diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 1739359f47..ebd01bf8cc 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -1345,87 +1345,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) WRAPPER8_16_SQ(rd8x8_c, rd16_c) WRAPPER8_16_SQ(bit8x8_c, bit16_c) -static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, - uint32_t maxi, uint32_t maxisign) -{ - if (a > mini) - return mini; - else if ((a ^ (1U << 31)) > maxisign) - return maxi; - else - return a; -} - -static void vector_clipf_c_opposite_sign(float *dst, const float *src, - float *min, float *max, int len) -{ - int i; - uint32_t mini = *(uint32_t *) min; - uint32_t maxi = *(uint32_t *) max; - uint32_t maxisign = maxi ^ (1U << 31); - uint32_t *dsti = (uint32_t *) dst; - const uint32_t *srci = (const uint32_t *) src; - - for (i = 0; i < len; i += 8) { - dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign); - dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign); - dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign); - dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign); - dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign); - dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign); - dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign); - dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign); - } -} - -static void vector_clipf_c(float *dst, const float *src, - float min, float max, int len) -{ - int i; - - if (min < 0 && max > 0) { - vector_clipf_c_opposite_sign(dst, src, &min, &max, len); - } else { - for (i = 0; i < len; i += 8) { - dst[i] = av_clipf(src[i], min, max); - dst[i + 1] = av_clipf(src[i + 1], min, max); - dst[i + 2] = av_clipf(src[i + 2], min, max); - dst[i + 3] = av_clipf(src[i + 3], min, max); - dst[i + 4] = av_clipf(src[i + 4], min, max); - dst[i + 5] = av_clipf(src[i + 5], min, max); - dst[i + 6] = av_clipf(src[i + 6], min, max); - dst[i + 7] = av_clipf(src[i + 7], min, max); - } - } -} - -static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2, - int order) -{ - int res = 0; - - while (order--) - res += *v1++ **v2++; - - return res; -} - -static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, - int32_t max, unsigned int len) -{ - do { - *dst++ = av_clip(*src++, min, max); - *dst++ = av_clip(*src++, min, max); - *dst++ = av_clip(*src++, min, max); - *dst++ = av_clip(*src++, min, max); - *dst++ = av_clip(*src++, min, max); - *dst++ = av_clip(*src++, min, max); - *dst++ = av_clip(*src++, min, max); - *dst++ = av_clip(*src++, min, max); - len -= 8; - } while (len > 0); -} - static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block) { ff_j_rev_dct(block); @@ -1661,10 +1580,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) c->try_8x8basis = try_8x8basis_c; c->add_8x8basis = add_8x8basis_c; - c->scalarproduct_int16 = scalarproduct_int16_c; - c->vector_clip_int32 = vector_clip_int32_c; - c->vector_clipf = vector_clipf_c; - c->shrink[0] = av_image_copy_plane; c->shrink[1] = ff_shrink22; c->shrink[2] = ff_shrink44; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 63a2684763..cbd19b7bbd 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -140,11 +140,6 @@ typedef struct DSPContext { void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len); - /* assume len is a multiple of 8, and arrays are 16-byte aligned */ - void (*vector_clipf)(float *dst /* align 16 */, - const float *src /* align 16 */, - float min, float max, int len /* align 16 */); - /* (I)DCT */ void (*fdct)(int16_t *block /* align 16 */); void (*fdct248)(int16_t *block /* align 16 */); @@ -204,30 +199,6 @@ typedef struct DSPContext { void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); - - /** - * Calculate scalar product of two vectors. - * @param len length of vectors, should be multiple of 16 - */ - int32_t (*scalarproduct_int16)(const int16_t *v1, - const int16_t *v2 /* align 16 */, int len); - - /** - * Clip each element in an array of int32_t to a given minimum and - * maximum value. - * @param dst destination array - * constraints: 16-byte aligned - * @param src source array - * constraints: 16-byte aligned - * @param min minimum value - * constraints: must be in the range [-(1 << 24), 1 << 24] - * @param max maximum value - * constraints: must be in the range [-(1 << 24), 1 << 24] - * @param len number of elements in the array - * constraints: multiple of 32 greater than zero - */ - void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, - int32_t max, unsigned int len); } DSPContext; void ff_dsputil_static_init(void); diff --git a/libavcodec/g729dec.c b/libavcodec/g729dec.c index d29ad1f502..6eb057f5d8 100644 --- a/libavcodec/g729dec.c +++ b/libavcodec/g729dec.c @@ -25,7 +25,7 @@ #include "avcodec.h" #include "libavutil/avutil.h" #include "get_bits.h" -#include "dsputil.h" +#include "audiodsp.h" #include "internal.h" @@ -100,7 +100,7 @@ typedef struct { } G729FormatDescription; typedef struct { - DSPContext dsp; + AudioDSPContext adsp; /// past excitation signal buffer int16_t exc_base[2*SUBFRAME_SIZE+PITCH_DELAY_MAX+INTERPOL_LEN]; @@ -381,8 +381,8 @@ static av_cold int decoder_init(AVCodecContext * avctx) for(i=0; i<4; i++) ctx->quant_energy[i] = -14336; // -14 in (5.10) - ff_dsputil_init(&ctx->dsp, avctx); - ctx->dsp.scalarproduct_int16 = scalarproduct_int16_c; + ff_audiodsp_init(&ctx->adsp); + ctx->adsp.scalarproduct_int16 = scalarproduct_int16_c; return 0; } @@ -578,7 +578,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr, } /* Decode the fixed-codebook gain. */ - ctx->past_gain_code[0] = ff_acelp_decode_gain_code(&ctx->dsp, gain_corr_factor, + ctx->past_gain_code[0] = ff_acelp_decode_gain_code(&ctx->adsp, gain_corr_factor, fc, MR_ENERGY, ctx->quant_energy, ma_prediction_coeff, @@ -668,7 +668,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr, /* Call postfilter and also update voicing decision for use in next frame. */ ff_g729_postfilter( - &ctx->dsp, + &ctx->adsp, &ctx->ht_prev_data, &is_periodic, &lp[i][0], diff --git a/libavcodec/g729postfilter.c b/libavcodec/g729postfilter.c index bcf509cfcc..9a775c47b2 100644 --- a/libavcodec/g729postfilter.c +++ b/libavcodec/g729postfilter.c @@ -107,7 +107,7 @@ static void residual_filter(int16_t* out, const int16_t* filter_coeffs, const in * * \return 0 if long-term prediction gain is less than 3dB, 1 - otherwise */ -static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, +static int16_t long_term_filter(AudioDSPContext *adsp, int pitch_delay_int, const int16_t* residual, int16_t *residual_filt, int subframe_size) { @@ -161,7 +161,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, /* Start of best delay searching code */ gain_num = 0; - ener = dsp->scalarproduct_int16(sig_scaled + RES_PREV_DATA_SIZE, + ener = adsp->scalarproduct_int16(sig_scaled + RES_PREV_DATA_SIZE, sig_scaled + RES_PREV_DATA_SIZE, subframe_size); if (ener) { @@ -190,7 +190,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, corr_int_num = 0; best_delay_int = pitch_delay_int - 1; for (i = pitch_delay_int - 1; i <= pitch_delay_int + 1; i++) { - sum = dsp->scalarproduct_int16(sig_scaled + RES_PREV_DATA_SIZE, + sum = adsp->scalarproduct_int16(sig_scaled + RES_PREV_DATA_SIZE, sig_scaled + RES_PREV_DATA_SIZE - i, subframe_size); if (sum > corr_int_num) { @@ -200,7 +200,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, } if (corr_int_num) { /* Compute denominator of pseudo-normalized correlation R'(0). */ - corr_int_den = dsp->scalarproduct_int16(sig_scaled - best_delay_int + RES_PREV_DATA_SIZE, + corr_int_den = adsp->scalarproduct_int16(sig_scaled - best_delay_int + RES_PREV_DATA_SIZE, sig_scaled - best_delay_int + RES_PREV_DATA_SIZE, subframe_size); @@ -227,7 +227,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, Also compute maximum value of above denominators over all k. */ tmp = corr_int_den; for (k = 0; k < ANALYZED_FRAC_DELAYS; k++) { - sum = dsp->scalarproduct_int16(&delayed_signal[k][1], + sum = adsp->scalarproduct_int16(&delayed_signal[k][1], &delayed_signal[k][1], subframe_size - 1); corr_den[k][0] = sum + delayed_signal[k][0 ] * delayed_signal[k][0 ]; @@ -255,7 +255,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, int gain_num_short_square; /* Compute numerator of pseudo-normalized correlation R'(k). */ - sum = dsp->scalarproduct_int16(&delayed_signal[k][i], + sum = adsp->scalarproduct_int16(&delayed_signal[k][i], sig_scaled + RES_PREV_DATA_SIZE, subframe_size); gain_num_short = FFMAX(sum >> sh_gain_num, 0); @@ -312,7 +312,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, LONG_INT_FILT_LEN, subframe_size + 1); /* Compute R'(k) correlation's numerator. */ - sum = dsp->scalarproduct_int16(residual_filt, + sum = adsp->scalarproduct_int16(residual_filt, sig_scaled + RES_PREV_DATA_SIZE, subframe_size); @@ -327,7 +327,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, } /* Compute R'(k) correlation's denominator. */ - sum = dsp->scalarproduct_int16(residual_filt, residual_filt, subframe_size); + sum = adsp->scalarproduct_int16(residual_filt, residual_filt, subframe_size); tmp = FFMAX(av_log2(sum) - 14, 0); sum >>= tmp; @@ -421,7 +421,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, * * \note All members of lp_gn, except 10-19 must be equal to zero. */ -static int16_t get_tilt_comp(DSPContext *dsp, int16_t *lp_gn, +static int16_t get_tilt_comp(AudioDSPContext *adsp, int16_t *lp_gn, const int16_t *lp_gd, int16_t* speech, int subframe_size) { @@ -437,8 +437,8 @@ static int16_t get_tilt_comp(DSPContext *dsp, int16_t *lp_gn, /* Now lp_gn (starting with 10) contains impulse response of A(z/FORMANT_PP_FACTOR_NUM)/A(z/FORMANT_PP_FACTOR_DEN) filter. */ - rh0 = dsp->scalarproduct_int16(lp_gn + 10, lp_gn + 10, 20); - rh1 = dsp->scalarproduct_int16(lp_gn + 10, lp_gn + 11, 20); + rh0 = adsp->scalarproduct_int16(lp_gn + 10, lp_gn + 10, 20); + rh1 = adsp->scalarproduct_int16(lp_gn + 10, lp_gn + 11, 20); /* downscale to avoid overflow */ temp = av_log2(rh0) - 14; @@ -511,7 +511,7 @@ static int16_t apply_tilt_comp(int16_t* out, int16_t* res_pst, int refl_coeff, return tmp; } -void ff_g729_postfilter(DSPContext *dsp, int16_t* ht_prev_data, int* voicing, +void ff_g729_postfilter(AudioDSPContext *adsp, int16_t* ht_prev_data, int* voicing, const int16_t *lp_filter_coeffs, int pitch_delay_int, int16_t* residual, int16_t* res_filter_data, int16_t* pos_filter_data, int16_t *speech, int subframe_size) @@ -541,7 +541,7 @@ void ff_g729_postfilter(DSPContext *dsp, int16_t* ht_prev_data, int* voicing, /* long-term filter. If long-term prediction gain is larger than 3dB (returned value is nonzero) then declare current subframe as periodic. */ - *voicing = FFMAX(*voicing, long_term_filter(dsp, pitch_delay_int, + *voicing = FFMAX(*voicing, long_term_filter(adsp, pitch_delay_int, residual, residual_filt_buf + 10, subframe_size)); @@ -549,7 +549,7 @@ void ff_g729_postfilter(DSPContext *dsp, int16_t* ht_prev_data, int* voicing, memmove(residual, residual + subframe_size, RES_PREV_DATA_SIZE * sizeof(int16_t)); /* short-term filter tilt compensation */ - tilt_comp_coeff = get_tilt_comp(dsp, lp_gn, lp_gd, residual_filt_buf + 10, subframe_size); + tilt_comp_coeff = get_tilt_comp(adsp, lp_gn, lp_gd, residual_filt_buf + 10, subframe_size); /* Apply second half of short-term postfilter: 1/A(z/FORMANT_PP_FACTOR_DEN) */ ff_celp_lp_synthesis_filter(pos_filter_data + 10, lp_gd + 1, diff --git a/libavcodec/g729postfilter.h b/libavcodec/g729postfilter.h index 5239fc80dd..89e3e40cea 100644 --- a/libavcodec/g729postfilter.h +++ b/libavcodec/g729postfilter.h @@ -22,7 +22,7 @@ #define FFMPEG_G729POSTFILTER_H #include <stdint.h> -#include "dsputil.h" +#include "audiodsp.h" /** * tilt compensation factor (G.729, k1>0) @@ -94,7 +94,7 @@ * Short-term postfilter (4.2.2). * Tilt-compensation (4.2.3) */ -void ff_g729_postfilter(DSPContext *dsp, int16_t* ht_prev_data, int* voicing, +void ff_g729_postfilter(AudioDSPContext *adsp, int16_t* ht_prev_data, int* voicing, const int16_t *lp_filter_coeffs, int pitch_delay_int, int16_t* residual, int16_t* res_filter_data, int16_t* pos_filter_data, int16_t *speech, diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index bb52a8c4c0..ef3685ac0f 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -1,5 +1,6 @@ OBJS += ppc/fmtconvert_altivec.o \ +OBJS-$(CONFIG_AUDIODSP) += ppc/audiodsp.o OBJS-$(CONFIG_BLOCKDSP) += ppc/blockdsp.o OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_ppc.o OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o @@ -24,7 +25,6 @@ ALTIVEC-OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_altivec.o \ ppc/fdct_altivec.o \ ppc/gmc_altivec.o \ ppc/idct_altivec.o \ - ppc/int_altivec.o \ FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o FFT-OBJS-$(HAVE_VSX) += ppc/fft_vsx.o diff --git a/libavcodec/ppc/int_altivec.c b/libavcodec/ppc/audiodsp.c index 50f55e2c9c..c88c3d9167 100644 --- a/libavcodec/ppc/int_altivec.c +++ b/libavcodec/ppc/audiodsp.c @@ -20,7 +20,7 @@ /** * @file - * miscellaneous integer operations + * miscellaneous audio operations */ #include "config.h" @@ -29,10 +29,13 @@ #endif #include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/ppc/cpu.h" #include "libavutil/ppc/types_altivec.h" #include "libavutil/ppc/util_altivec.h" -#include "libavcodec/dsputil.h" -#include "dsputil_altivec.h" +#include "libavcodec/audiodsp.h" + +#if HAVE_ALTIVEC static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, int order) @@ -56,7 +59,14 @@ static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, return ires; } -av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx) +#endif /* HAVE_ALTIVEC */ + +av_cold void ff_audiodsp_init_ppc(AudioDSPContext *c) { +#if HAVE_ALTIVEC + if (!PPC_ALTIVEC(av_get_cpu_flags())) + return; + c->scalarproduct_int16 = scalarproduct_int16_altivec; +#endif /* HAVE_ALTIVEC */ } diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h index 225d1b0d9c..a835024169 100644 --- a/libavcodec/ppc/dsputil_altivec.h +++ b/libavcodec/ppc/dsputil_altivec.h @@ -36,6 +36,5 @@ void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth); -void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx); #endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */ diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index ccd21aedf1..ebdf0a4b48 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -35,7 +35,7 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, int mm_flags = av_get_cpu_flags(); if (PPC_ALTIVEC(mm_flags)) { ff_dsputil_init_altivec(c, avctx, high_bit_depth); - ff_int_init_altivec(c, avctx); + c->gmc1 = ff_gmc1_altivec; if (!high_bit_depth) { diff --git a/libavcodec/ra144.c b/libavcodec/ra144.c index 992972182f..d5ad02f2d3 100644 --- a/libavcodec/ra144.c +++ b/libavcodec/ra144.c @@ -1681,9 +1681,9 @@ unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy) } /** inverse root mean square */ -int ff_irms(DSPContext *dsp, const int16_t *data) +int ff_irms(AudioDSPContext *adsp, const int16_t *data) { - unsigned int sum = dsp->scalarproduct_int16(data, data, BLOCKSIZE); + unsigned int sum = adsp->scalarproduct_int16(data, data, BLOCKSIZE); if (sum == 0) return 0; /* OOPS - division by zero */ @@ -1701,7 +1701,7 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs, if (cba_idx) { cba_idx += BLOCKSIZE/2 - 1; ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx); - m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * gval) >> 12; + m[0] = (ff_irms(&ractx->adsp, ractx->buffer_a) * gval) >> 12; } else { m[0] = 0; } diff --git a/libavcodec/ra144.h b/libavcodec/ra144.h index c2ee59b2dc..c1ceb87341 100644 --- a/libavcodec/ra144.h +++ b/libavcodec/ra144.h @@ -25,7 +25,7 @@ #include <stdint.h> #include "lpc.h" #include "audio_frame_queue.h" -#include "dsputil.h" +#include "audiodsp.h" #define NBLOCKS 4 ///< number of subblocks within a block #define BLOCKSIZE 40 ///< subblock size in 16-bit words @@ -36,7 +36,7 @@ typedef struct RA144Context { AVCodecContext *avctx; - DSPContext dsp; + AudioDSPContext adsp; LPCContext lpc_ctx; AudioFrameQueue afq; int last_frame; @@ -72,7 +72,7 @@ unsigned int ff_rms(const int *data); int ff_interp(RA144Context *ractx, int16_t *out, int a, int copyold, int energy); unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy); -int ff_irms(DSPContext *dsp, const int16_t *data/*align 16*/); +int ff_irms(AudioDSPContext *adsp, const int16_t *data/*align 16*/); void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs, int cba_idx, int cb1_idx, int cb2_idx, int gval, int gain); diff --git a/libavcodec/ra144dec.c b/libavcodec/ra144dec.c index ab7cc68306..29c78229bb 100644 --- a/libavcodec/ra144dec.c +++ b/libavcodec/ra144dec.c @@ -34,7 +34,7 @@ static av_cold int ra144_decode_init(AVCodecContext * avctx) RA144Context *ractx = avctx->priv_data; ractx->avctx = avctx; - ff_dsputil_init(&ractx->dsp, avctx); + ff_audiodsp_init(&ractx->adsp); ractx->lpc_coef[0] = ractx->lpc_tables[0]; ractx->lpc_coef[1] = ractx->lpc_tables[1]; diff --git a/libavcodec/ra144enc.c b/libavcodec/ra144enc.c index 1f4e5bae45..499c41a038 100644 --- a/libavcodec/ra144enc.c +++ b/libavcodec/ra144enc.c @@ -61,7 +61,7 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx) ractx->lpc_coef[0] = ractx->lpc_tables[0]; ractx->lpc_coef[1] = ractx->lpc_tables[1]; ractx->avctx = avctx; - ff_dsputil_init(&ractx->dsp, avctx); + ff_audiodsp_init(&ractx->adsp); ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER, FF_LPC_TYPE_LEVINSON); if (ret < 0) @@ -374,7 +374,7 @@ static void ra144_encode_subblock(RA144Context *ractx, memcpy(cba, work + LPC_ORDER, sizeof(cba)); ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1); - m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * rms) >> 12; + m[0] = (ff_irms(&ractx->adsp, ractx->buffer_a) * rms) >> 12; } fixed_cb_search(work + LPC_ORDER, coefs, data, cba_idx, &cb1_idx, &cb2_idx); for (i = 0; i < BLOCKSIZE; i++) { diff --git a/libavcodec/takdec.c b/libavcodec/takdec.c index d76946f6ef..9bfbfcc3df 100644 --- a/libavcodec/takdec.c +++ b/libavcodec/takdec.c @@ -28,9 +28,9 @@ #include "libavutil/internal.h" #include "libavutil/samplefmt.h" #include "tak.h" +#include "audiodsp.h" #include "thread.h" #include "avcodec.h" -#include "dsputil.h" #include "internal.h" #include "unary.h" @@ -46,7 +46,7 @@ typedef struct MCDParam { typedef struct TAKDecContext { AVCodecContext *avctx; ///< parent AVCodecContext - DSPContext dsp; + AudioDSPContext adsp; TAKStreamInfo ti; GetBitContext gb; ///< bitstream reader initialized to start at the current frame @@ -171,7 +171,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx) { TAKDecContext *s = avctx->priv_data; - ff_dsputil_init(&s->dsp, avctx); + ff_audiodsp_init(&s->adsp); s->avctx = avctx; avctx->bits_per_raw_sample = avctx->bits_per_coded_sample; @@ -469,8 +469,8 @@ static int decode_subframe(TAKDecContext *s, int32_t *decoded, int v = 1 << (filter_quant - 1); if (filter_order & -16) - v += s->dsp.scalarproduct_int16(&s->residues[i], s->filter, - filter_order & -16); + v += s->adsp.scalarproduct_int16(&s->residues[i], s->filter, + filter_order & -16); for (j = filter_order & -16; j < filter_order; j += 4) { v += s->residues[i + j + 3] * s->filter[j + 3] + s->residues[i + j + 2] * s->filter[j + 2] + @@ -640,8 +640,8 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length) int v = 1 << 9; if (filter_order == 16) { - v += s->dsp.scalarproduct_int16(&s->residues[i], s->filter, - filter_order); + v += s->adsp.scalarproduct_int16(&s->residues[i], s->filter, + filter_order); } else { v += s->residues[i + 7] * s->filter[7] + s->residues[i + 6] * s->filter[6] + diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index fa03f7ce26..a08c525e30 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -2,6 +2,7 @@ OBJS += x86/constants.o \ x86/fmtconvert_init.o \ OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o +OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_init.o OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o OBJS-$(CONFIG_DCT) += x86/dct_init.o OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o @@ -69,6 +70,7 @@ YASM-OBJS += x86/deinterlace.o \ x86/fmtconvert.o \ YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o +YASM-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp.o YASM-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp.o YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o YASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp_mmx.o x86/diracdsp_yasm.o\ diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm new file mode 100644 index 0000000000..83f9bb6f45 --- /dev/null +++ b/libavcodec/x86/audiodsp.asm @@ -0,0 +1,133 @@ +;****************************************************************************** +;* optimized audio functions +;* Copyright (c) 2008 Loren Merritt +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_TEXT + +%macro SCALARPRODUCT 0 +; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order) +cglobal scalarproduct_int16, 3,3,3, v1, v2, order + shl orderq, 1 + add v1q, orderq + add v2q, orderq + neg orderq + pxor m2, m2 +.loop: + movu m0, [v1q + orderq] + movu m1, [v1q + orderq + mmsize] + pmaddwd m0, [v2q + orderq] + pmaddwd m1, [v2q + orderq + mmsize] + paddd m2, m0 + paddd m2, m1 + add orderq, mmsize*2 + jl .loop + HADDD m2, m0 + movd eax, m2 +%if mmsize == 8 + emms +%endif + RET +%endmacro + +INIT_MMX mmxext +SCALARPRODUCT +INIT_XMM sse2 +SCALARPRODUCT + + +;----------------------------------------------------------------------------- +; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, +; int32_t max, unsigned int len) +;----------------------------------------------------------------------------- + +; %1 = number of xmm registers used +; %2 = number of inline load/process/store loops per asm loop +; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop +; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) +; %5 = suffix +%macro VECTOR_CLIP_INT32 4-5 +cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len +%if %4 + cvtsi2ss m4, minm + cvtsi2ss m5, maxm +%else + movd m4, minm + movd m5, maxm +%endif + SPLATD m4 + SPLATD m5 +.loop: +%assign %%i 0 +%rep %2 + mova m0, [srcq+mmsize*(0+%%i)] + mova m1, [srcq+mmsize*(1+%%i)] + mova m2, [srcq+mmsize*(2+%%i)] + mova m3, [srcq+mmsize*(3+%%i)] +%if %3 + mova m7, [srcq+mmsize*(4+%%i)] + mova m8, [srcq+mmsize*(5+%%i)] + mova m9, [srcq+mmsize*(6+%%i)] + mova m10, [srcq+mmsize*(7+%%i)] +%endif + CLIPD m0, m4, m5, m6 + CLIPD m1, m4, m5, m6 + CLIPD m2, m4, m5, m6 + CLIPD m3, m4, m5, m6 +%if %3 + CLIPD m7, m4, m5, m6 + CLIPD m8, m4, m5, m6 + CLIPD m9, m4, m5, m6 + CLIPD m10, m4, m5, m6 +%endif + mova [dstq+mmsize*(0+%%i)], m0 + mova [dstq+mmsize*(1+%%i)], m1 + mova [dstq+mmsize*(2+%%i)], m2 + mova [dstq+mmsize*(3+%%i)], m3 +%if %3 + mova [dstq+mmsize*(4+%%i)], m7 + mova [dstq+mmsize*(5+%%i)], m8 + mova [dstq+mmsize*(6+%%i)], m9 + mova [dstq+mmsize*(7+%%i)], m10 +%endif +%assign %%i %%i+4*(%3+1) +%endrep + add srcq, mmsize*4*(%2+%3) + add dstq, mmsize*4*(%2+%3) + sub lend, mmsize*(%2+%3) + jg .loop + REP_RET +%endmacro + +INIT_MMX mmx +%define CLIPD CLIPD_MMX +VECTOR_CLIP_INT32 0, 1, 0, 0 +INIT_XMM sse2 +VECTOR_CLIP_INT32 6, 1, 0, 0, _int +%define CLIPD CLIPD_SSE2 +VECTOR_CLIP_INT32 6, 2, 0, 1 +INIT_XMM sse4 +%define CLIPD CLIPD_SSE41 +%ifdef m8 +VECTOR_CLIP_INT32 11, 1, 1, 0 +%else +VECTOR_CLIP_INT32 6, 1, 0, 0 +%endif diff --git a/libavcodec/x86/audiodsp.h b/libavcodec/x86/audiodsp.h new file mode 100644 index 0000000000..35f9f1485b --- /dev/null +++ b/libavcodec/x86/audiodsp.h @@ -0,0 +1,25 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_X86_AUDIODSP_H +#define AVCODEC_X86_AUDIODSP_H + +void ff_vector_clipf_sse(float *dst, const float *src, + float min, float max, int len); + +#endif /* AVCODEC_X86_AUDIODSP_H */ diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c new file mode 100644 index 0000000000..d586bf6c04 --- /dev/null +++ b/libavcodec/x86/audiodsp_init.c @@ -0,0 +1,66 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/audiodsp.h" +#include "audiodsp.h" + +int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, + int order); +int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, + int order); + +void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, + int32_t min, int32_t max, unsigned int len); +void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, + int32_t min, int32_t max, unsigned int len); +void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, + int32_t min, int32_t max, unsigned int len); +void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, + int32_t min, int32_t max, unsigned int len); + +av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) +{ + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMX(cpu_flags)) + c->vector_clip_int32 = ff_vector_clip_int32_mmx; + + if (EXTERNAL_MMXEXT(cpu_flags)) + c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; + + if (EXTERNAL_SSE(cpu_flags)) + c->vector_clipf = ff_vector_clipf_sse; + + if (EXTERNAL_SSE2(cpu_flags)) { + c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; + if (cpu_flags & AV_CPU_FLAG_ATOM) + c->vector_clip_int32 = ff_vector_clip_int32_int_sse2; + else + c->vector_clip_int32 = ff_vector_clip_int32_sse2; + } + + if (EXTERNAL_SSE4(cpu_flags)) + c->vector_clip_int32 = ff_vector_clip_int32_sse4; +} diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm index 3bb5d9cbfe..e261c0fcc7 100644 --- a/libavcodec/x86/dsputil.asm +++ b/libavcodec/x86/dsputil.asm @@ -30,115 +30,6 @@ cextern pb_80 SECTION_TEXT -%macro SCALARPRODUCT 0 -; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order) -cglobal scalarproduct_int16, 3,3,3, v1, v2, order - shl orderq, 1 - add v1q, orderq - add v2q, orderq - neg orderq - pxor m2, m2 -.loop: - movu m0, [v1q + orderq] - movu m1, [v1q + orderq + mmsize] - pmaddwd m0, [v2q + orderq] - pmaddwd m1, [v2q + orderq + mmsize] - paddd m2, m0 - paddd m2, m1 - add orderq, mmsize*2 - jl .loop - HADDD m2, m0 - movd eax, m2 -%if mmsize == 8 - emms -%endif - RET -%endmacro - -INIT_MMX mmxext -SCALARPRODUCT -INIT_XMM sse2 -SCALARPRODUCT - - -;----------------------------------------------------------------------------- -; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, -; int32_t max, unsigned int len) -;----------------------------------------------------------------------------- - -; %1 = number of xmm registers used -; %2 = number of inline load/process/store loops per asm loop -; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop -; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) -; %5 = suffix -%macro VECTOR_CLIP_INT32 4-5 -cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len -%if %4 - cvtsi2ss m4, minm - cvtsi2ss m5, maxm -%else - movd m4, minm - movd m5, maxm -%endif - SPLATD m4 - SPLATD m5 -.loop: -%assign %%i 0 -%rep %2 - mova m0, [srcq+mmsize*(0+%%i)] - mova m1, [srcq+mmsize*(1+%%i)] - mova m2, [srcq+mmsize*(2+%%i)] - mova m3, [srcq+mmsize*(3+%%i)] -%if %3 - mova m7, [srcq+mmsize*(4+%%i)] - mova m8, [srcq+mmsize*(5+%%i)] - mova m9, [srcq+mmsize*(6+%%i)] - mova m10, [srcq+mmsize*(7+%%i)] -%endif - CLIPD m0, m4, m5, m6 - CLIPD m1, m4, m5, m6 - CLIPD m2, m4, m5, m6 - CLIPD m3, m4, m5, m6 -%if %3 - CLIPD m7, m4, m5, m6 - CLIPD m8, m4, m5, m6 - CLIPD m9, m4, m5, m6 - CLIPD m10, m4, m5, m6 -%endif - mova [dstq+mmsize*(0+%%i)], m0 - mova [dstq+mmsize*(1+%%i)], m1 - mova [dstq+mmsize*(2+%%i)], m2 - mova [dstq+mmsize*(3+%%i)], m3 -%if %3 - mova [dstq+mmsize*(4+%%i)], m7 - mova [dstq+mmsize*(5+%%i)], m8 - mova [dstq+mmsize*(6+%%i)], m9 - mova [dstq+mmsize*(7+%%i)], m10 -%endif -%assign %%i %%i+4*(%3+1) -%endrep - add srcq, mmsize*4*(%2+%3) - add dstq, mmsize*4*(%2+%3) - sub lend, mmsize*(%2+%3) - jg .loop - REP_RET -%endmacro - -INIT_MMX mmx -%define CLIPD CLIPD_MMX -VECTOR_CLIP_INT32 0, 1, 0, 0 -INIT_XMM sse2 -VECTOR_CLIP_INT32 6, 1, 0, 0, _int -%define CLIPD CLIPD_SSE2 -VECTOR_CLIP_INT32 6, 2, 0, 1 -INIT_XMM sse4 -%define CLIPD CLIPD_SSE41 -%ifdef m8 -VECTOR_CLIP_INT32 11, 1, 1, 0 -%else -VECTOR_CLIP_INT32 6, 1, 0, 0 -%endif - ; %1 = aligned/unaligned %macro BSWAP_LOOPS 1 mov r3, r2 diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c index 5c12364c23..ed58598810 100644 --- a/libavcodec/x86/dsputil_init.c +++ b/libavcodec/x86/dsputil_init.c @@ -29,23 +29,9 @@ #include "dsputil_x86.h" #include "idct_xvid.h" -int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, - int order); -int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, - int order); - void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w); void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w); -void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, - int32_t min, int32_t max, unsigned int len); -void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, - int32_t min, int32_t max, unsigned int len); -void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, - int32_t min, int32_t max, unsigned int len); -void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, - int32_t min, int32_t max, unsigned int len); - static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int cpu_flags, unsigned high_bit_depth) { @@ -81,7 +67,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, #endif /* HAVE_MMX_INLINE */ #if HAVE_MMX_EXTERNAL - c->vector_clip_int32 = ff_vector_clip_int32_mmx; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; #endif /* HAVE_MMX_EXTERNAL */ } @@ -96,19 +81,12 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, c->idct = ff_idct_xvid_mmxext; } #endif /* HAVE_MMXEXT_INLINE */ - -#if HAVE_MMXEXT_EXTERNAL - c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; -#endif /* HAVE_MMXEXT_EXTERNAL */ } static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int cpu_flags, unsigned high_bit_depth) { #if HAVE_YASM -#if HAVE_SSE_EXTERNAL - c->vector_clipf = ff_vector_clipf_sse; -#endif #if HAVE_INLINE_ASM && CONFIG_VIDEODSP c->gmc = ff_gmc_sse; #endif @@ -128,12 +106,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, #endif /* HAVE_SSE2_INLINE */ #if HAVE_SSE2_EXTERNAL - c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; - if (cpu_flags & AV_CPU_FLAG_ATOM) { - c->vector_clip_int32 = ff_vector_clip_int32_int_sse2; - } else { - c->vector_clip_int32 = ff_vector_clip_int32_sse2; - } c->bswap_buf = ff_bswap32_buf_sse2; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2; #endif /* HAVE_SSE2_EXTERNAL */ @@ -147,14 +119,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, #endif /* HAVE_SSSE3_EXTERNAL */ } -static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx, - int cpu_flags, unsigned high_bit_depth) -{ -#if HAVE_SSE4_EXTERNAL - c->vector_clip_int32 = ff_vector_clip_int32_sse4; -#endif /* HAVE_SSE4_EXTERNAL */ -} - av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { @@ -175,9 +139,6 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, if (EXTERNAL_SSSE3(cpu_flags)) dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth); - if (EXTERNAL_SSE4(cpu_flags)) - dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth); - if (CONFIG_ENCODERS) ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); } diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 3f187b70b5..54aba38b53 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -28,7 +28,6 @@ #include "libavutil/x86/asm.h" #include "libavcodec/pixels.h" #include "libavcodec/videodsp.h" -#include "constants.h" #include "dsputil_x86.h" #include "inline_asm.h" diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h index e723df1937..b5d7291f28 100644 --- a/libavcodec/x86/dsputil_x86.h +++ b/libavcodec/x86/dsputil_x86.h @@ -53,10 +53,6 @@ void ff_gmc_sse(uint8_t *dst, uint8_t *src, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); -void ff_vector_clipf_sse(float *dst, const float *src, - float min, float max, int len); - - void ff_mmx_idct(int16_t *block); void ff_mmxext_idct(int16_t *block); |