aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristophe Gisquet <christophe.gisquet@gmail.com>2015-02-01 15:34:41 -0300
committerJames Almer <jamrial@gmail.com>2015-02-01 20:23:03 -0300
commit6a6aeb538b4bb3a7afac35bf807dd34a391a21e8 (patch)
tree1220d0a6309597a2cd65d9fc121f78d99c3c8799
parentbff7feb328d8d3fd234f920cb45e0ebdbdd7b407 (diff)
downloadffmpeg-6a6aeb538b4bb3a7afac35bf807dd34a391a21e8.tar.gz
hevc/sao: use aligned copies
For band filter, source and destination are aligned (except for 16x16 ctbs), and otherwise, they are most often aligned. Overall, the total width is also too small for amortizing memcpy. Timings (using an intrinsic version of edge filters): B/32 B/64 E/32 E/64 Before: 32045 93952 38925 126896 After: 26772 83803 33942 117182
-rw-r--r--libavcodec/hevc_filter.c26
1 files changed, 18 insertions, 8 deletions
diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c
index 9354c14300..7a0ec6d4be 100644
--- a/libavcodec/hevc_filter.c
+++ b/libavcodec/hevc_filter.c
@@ -139,15 +139,25 @@ static int get_qPy(HEVCContext *s, int xC, int yC)
return s->qp_y_tab[x + y * s->sps->min_cb_width];
}
-static void copy_CTB(uint8_t *dst, uint8_t *src,
- int width, int height, int stride_dst, int stride_src)
+static void copy_CTB(uint8_t *dst, uint8_t *src, int width, int height,
+ intptr_t stride_dst, intptr_t stride_src)
{
- int i;
-
- for (i = 0; i < height; i++) {
- memcpy(dst, src, width);
- dst += stride_dst;
- src += stride_src;
+int i, j;
+
+ if (((intptr_t)dst | (intptr_t)src | stride_dst | stride_src) & 15) {
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j+=8)
+ AV_COPY64(dst+j, src+j);
+ dst += stride_dst;
+ src += stride_src;
+ }
+ } else {
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j+=16)
+ AV_COPY128(dst+j, src+j);
+ dst += stride_dst;
+ src += stride_src;
+ }
}
}