aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2014-05-26 20:38:18 -0400
committerMichael Niedermayer <michaelni@gmx.at>2014-06-02 00:47:54 +0200
commit9b53853756f7e7535b4178054757fc2cb90408f3 (patch)
treea4ad715e73bfd1e74c0da827c49bc3d8c0e6ea33
parente91f27cbbb276bbdc9f7bc18e69e9b5fe6d16387 (diff)
downloadffmpeg-9b53853756f7e7535b4178054757fc2cb90408f3.tar.gz
Rewrite main resampling loop (common and linear).
This removes a branch at a performance-sensitive point (in the middle of the loop). In fate-swr-resample-s32p-8000-2626, this makes the code about 10% faster. It also simplifies the loops, allowing us to rewrite it in yasm at some later point. The compensation_distance != 0 code and index < 0 code are still kind of hairy. For compensation_distance != 0, this should likely be handled in the caller, so that it calls swri_resample twice (once until the dst_incr switch-point, and once with the remainder of the samples). For index < 0, the code should probably be rewritten to break out of the loop once sample_index >= 0, and then resume (e.g. as a tail-call) to the common or linear resampling loops. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libswresample/resample_template.c64
1 files changed, 48 insertions, 16 deletions
diff --git a/libswresample/resample_template.c b/libswresample/resample_template.c
index becff1265a..3fc83158bc 100644
--- a/libswresample/resample_template.c
+++ b/libswresample/resample_template.c
@@ -134,37 +134,69 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int
av_assert2(index >= 0);
*consumed= index;
index = 0;
- }else if(compensation_distance == 0 && !c->linear && index >= 0){
- int sample_index = 0;
- for(dst_index=0; dst_index < dst_size; dst_index++){
- FELEM *filter;
- sample_index += index >> c->phase_shift;
+ } else if (compensation_distance == 0 && index >= 0) {
+ int64_t end_index = (1 + src_size - c->filter_length) << c->phase_shift;
+ int64_t delta_frac = (end_index - index) * c->src_incr - c->frac;
+ int delta_n = (delta_frac + c->dst_incr - 1) / c->dst_incr;
+ int n = FFMIN(dst_size, delta_n);
+ int sample_index;
+
+ if (!c->linear) {
+ sample_index = index >> c->phase_shift;
index &= c->phase_mask;
- filter= ((FELEM*)c->filter_bank) + c->filter_alloc*index;
+ for (dst_index = 0; dst_index < n; dst_index++) {
+ FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
- if(sample_index + c->filter_length > src_size){
- break;
- }else{
#ifdef COMMON_CORE
COMMON_CORE
#else
FELEM2 val=0;
- for(i=0; i<c->filter_length; i++){
+ for (i = 0; i < c->filter_length; i++) {
val += src[sample_index + i] * (FELEM2)filter[i];
}
OUT(dst[dst_index], val);
#endif
+
+ frac += dst_incr_frac;
+ index += dst_incr;
+ if (frac >= c->src_incr) {
+ frac -= c->src_incr;
+ index++;
+ }
+ sample_index += index >> c->phase_shift;
+ index &= c->phase_mask;
}
+ } else {
+ sample_index = index >> c->phase_shift;
+ index &= c->phase_mask;
+ for (dst_index = 0; dst_index < n; dst_index++) {
+ FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
+ FELEM2 val=0, v2 = 0;
- frac += dst_incr_frac;
- index += dst_incr;
- if(frac >= c->src_incr){
- frac -= c->src_incr;
- index++;
+#ifdef LINEAR_CORE
+ LINEAR_CORE
+#else
+ for (i = 0; i < c->filter_length; i++) {
+ val += src[sample_index + i] * (FELEM2)filter[i];
+ v2 += src[sample_index + i] * (FELEM2)filter[i + c->filter_alloc];
+ }
+#endif
+ val += (v2 - val) * (FELEML) frac / c->src_incr;
+ OUT(dst[dst_index], val);
+
+ frac += dst_incr_frac;
+ index += dst_incr;
+ if (frac >= c->src_incr) {
+ frac -= c->src_incr;
+ index++;
+ }
+ sample_index += index >> c->phase_shift;
+ index &= c->phase_mask;
}
}
+
*consumed = sample_index;
- }else{
+ } else {
int sample_index = 0;
for(dst_index=0; dst_index < dst_size; dst_index++){
FELEM *filter;