diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2014-05-26 20:38:18 -0400 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-06-02 00:47:54 +0200 |
commit | 9b53853756f7e7535b4178054757fc2cb90408f3 (patch) | |
tree | a4ad715e73bfd1e74c0da827c49bc3d8c0e6ea33 /libswresample/resample_template.c | |
parent | e91f27cbbb276bbdc9f7bc18e69e9b5fe6d16387 (diff) | |
download | ffmpeg-9b53853756f7e7535b4178054757fc2cb90408f3.tar.gz |
Rewrite main resampling loop (common and linear).
This removes a branch at a performance-sensitive point (in the middle
of the loop). In fate-swr-resample-s32p-8000-2626, this makes the code
about 10% faster. It also simplifies the loops, allowing us to rewrite
it in yasm at some later point.
The compensation_distance != 0 code and index < 0 code are still kind
of hairy. For compensation_distance != 0, this should likely be handled
in the caller, so that it calls swri_resample twice (once until the
dst_incr switch-point, and once with the remainder of the samples). For
index < 0, the code should probably be rewritten to break out of the
loop once sample_index >= 0, and then resume (e.g. as a tail-call) to
the common or linear resampling loops.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswresample/resample_template.c')
-rw-r--r-- | libswresample/resample_template.c | 64 |
1 files changed, 48 insertions, 16 deletions
diff --git a/libswresample/resample_template.c b/libswresample/resample_template.c index becff1265a..3fc83158bc 100644 --- a/libswresample/resample_template.c +++ b/libswresample/resample_template.c @@ -134,37 +134,69 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int av_assert2(index >= 0); *consumed= index; index = 0; - }else if(compensation_distance == 0 && !c->linear && index >= 0){ - int sample_index = 0; - for(dst_index=0; dst_index < dst_size; dst_index++){ - FELEM *filter; - sample_index += index >> c->phase_shift; + } else if (compensation_distance == 0 && index >= 0) { + int64_t end_index = (1 + src_size - c->filter_length) << c->phase_shift; + int64_t delta_frac = (end_index - index) * c->src_incr - c->frac; + int delta_n = (delta_frac + c->dst_incr - 1) / c->dst_incr; + int n = FFMIN(dst_size, delta_n); + int sample_index; + + if (!c->linear) { + sample_index = index >> c->phase_shift; index &= c->phase_mask; - filter= ((FELEM*)c->filter_bank) + c->filter_alloc*index; + for (dst_index = 0; dst_index < n; dst_index++) { + FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index; - if(sample_index + c->filter_length > src_size){ - break; - }else{ #ifdef COMMON_CORE COMMON_CORE #else FELEM2 val=0; - for(i=0; i<c->filter_length; i++){ + for (i = 0; i < c->filter_length; i++) { val += src[sample_index + i] * (FELEM2)filter[i]; } OUT(dst[dst_index], val); #endif + + frac += dst_incr_frac; + index += dst_incr; + if (frac >= c->src_incr) { + frac -= c->src_incr; + index++; + } + sample_index += index >> c->phase_shift; + index &= c->phase_mask; } + } else { + sample_index = index >> c->phase_shift; + index &= c->phase_mask; + for (dst_index = 0; dst_index < n; dst_index++) { + FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index; + FELEM2 val=0, v2 = 0; - frac += dst_incr_frac; - index += dst_incr; - if(frac >= c->src_incr){ - frac -= c->src_incr; - index++; +#ifdef LINEAR_CORE + LINEAR_CORE +#else + for (i = 0; i < c->filter_length; i++) { + val += src[sample_index + i] * (FELEM2)filter[i]; + v2 += src[sample_index + i] * (FELEM2)filter[i + c->filter_alloc]; + } +#endif + val += (v2 - val) * (FELEML) frac / c->src_incr; + OUT(dst[dst_index], val); + + frac += dst_incr_frac; + index += dst_incr; + if (frac >= c->src_incr) { + frac -= c->src_incr; + index++; + } + sample_index += index >> c->phase_shift; + index &= c->phase_mask; } } + *consumed = sample_index; - }else{ + } else { int sample_index = 0; for(dst_index=0; dst_index < dst_size; dst_index++){ FELEM *filter; |