aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2011-01-26 22:37:50 -0500
committerMichael Niedermayer <michaelni@gmx.at>2011-01-30 03:41:00 +0100
commitefcc10645525e7b5c2f3d7d46ecf6b5d267867f5 (patch)
tree67e4788dadc5ceed721f9aecffe731a670db0115
parent59884f55d1eaf1481771ee97b977000b8be1ddc7 (diff)
downloadffmpeg-efcc10645525e7b5c2f3d7d46ecf6b5d267867f5.tar.gz
Optimize C version of ff_emulated_edge_mc().
From ~780 cycles to 551 cycles, mostly just by using libc memcpy() instead of manually shuffling individual bytes around. (cherry picked from commit e5262ec44a30a9132f0361f775c5b63d20e4e4d5)
-rw-r--r--libavcodec/dsputil.c39
1 files changed, 23 insertions, 16 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 84f91fc135..fafe5f52d7 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -355,38 +355,45 @@ void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int blo
start_x= FFMAX(0, -src_x);
end_y= FFMIN(block_h, h-src_y);
end_x= FFMIN(block_w, w-src_x);
+ assert(start_y < end_y && block_h);
+ assert(start_x < end_x && block_w);
- // copy existing part
- for(y=start_y; y<end_y; y++){
- for(x=start_x; x<end_x; x++){
- buf[x + y*linesize]= src[x + y*linesize];
- }
- }
+ w = end_x - start_x;
+ src += start_y*linesize + start_x;
+ buf += start_x;
//top
for(y=0; y<start_y; y++){
- for(x=start_x; x<end_x; x++){
- buf[x + y*linesize]= buf[x + start_y*linesize];
- }
+ memcpy(buf, src, w);
+ buf += linesize;
+ }
+
+ // copy existing part
+ for(; y<end_y; y++){
+ memcpy(buf, src, w);
+ src += linesize;
+ buf += linesize;
}
//bottom
- for(y=end_y; y<block_h; y++){
- for(x=start_x; x<end_x; x++){
- buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
- }
+ src -= linesize;
+ for(; y<block_h; y++){
+ memcpy(buf, src, w);
+ buf += linesize;
}
- for(y=0; y<block_h; y++){
+ buf -= block_h * linesize + start_x;
+ while (block_h--){
//left
for(x=0; x<start_x; x++){
- buf[x + y*linesize]= buf[start_x + y*linesize];
+ buf[x] = buf[start_x];
}
//right
for(x=end_x; x<block_w; x++){
- buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
+ buf[x] = buf[end_x - 1];
}
+ buf += linesize;
}
}