diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2014-02-08 19:32:18 -0500 |
---|---|---|
committer | Clément Bœsch <u@pkh.me> | 2014-02-09 18:07:15 +0100 |
commit | 0d4d223353b746b2e898a137322acc4bfeaab7d7 (patch) | |
tree | d4b74859a1b86829857aaffcabe84cf05a9aaf35 /libavcodec/vp9dsp.c | |
parent | 37c6eac296ba831a4cb5d238200df9237fb3506e (diff) | |
download | ffmpeg-0d4d223353b746b2e898a137322acc4bfeaab7d7.tar.gz |
vp9: invert order in l[] intra prediction array.
The directional intra predictors either don't care about order (dc, h,
dc_left, tm), or they prefer inverted order (vr, dr, hd). This allows
more efficient SIMD implementations.
Diffstat (limited to 'libavcodec/vp9dsp.c')
-rw-r--r-- | libavcodec/vp9dsp.c | 70 |
1 files changed, 35 insertions, 35 deletions
diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c index ec0b411c93..e2f99f72c8 100644 --- a/libavcodec/vp9dsp.c +++ b/libavcodec/vp9dsp.c @@ -84,10 +84,10 @@ static void vert_32x32_c(uint8_t *dst, ptrdiff_t stride, static void hor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top) { - AV_WN32A(dst + stride * 0, left[0] * 0x01010101U); - AV_WN32A(dst + stride * 1, left[1] * 0x01010101U); - AV_WN32A(dst + stride * 2, left[2] * 0x01010101U); - AV_WN32A(dst + stride * 3, left[3] * 0x01010101U); + AV_WN32A(dst + stride * 0, left[3] * 0x01010101U); + AV_WN32A(dst + stride * 1, left[2] * 0x01010101U); + AV_WN32A(dst + stride * 2, left[1] * 0x01010101U); + AV_WN32A(dst + stride * 3, left[0] * 0x01010101U); } static void hor_8x8_c(uint8_t *dst, ptrdiff_t stride, @@ -96,7 +96,7 @@ static void hor_8x8_c(uint8_t *dst, ptrdiff_t stride, int y; for (y = 0; y < 8; y++) { - AV_WN64A(dst, left[y] * 0x0101010101010101ULL); + AV_WN64A(dst, left[7 - y] * 0x0101010101010101ULL); dst += stride; } } @@ -107,7 +107,7 @@ static void hor_16x16_c(uint8_t *dst, ptrdiff_t stride, int y; for (y = 0; y < 16; y++) { - uint64_t p8 = left[y] * 0x0101010101010101ULL; + uint64_t p8 = left[15 - y] * 0x0101010101010101ULL; AV_WN64A(dst + 0, p8); AV_WN64A(dst + 8, p8); @@ -121,7 +121,7 @@ static void hor_32x32_c(uint8_t *dst, ptrdiff_t stride, int y; for (y = 0; y < 32; y++) { - uint64_t p8 = left[y] * 0x0101010101010101ULL; + uint64_t p8 = left[31 - y] * 0x0101010101010101ULL; AV_WN64A(dst + 0, p8); AV_WN64A(dst + 8, p8); @@ -137,7 +137,7 @@ static void tm_4x4_c(uint8_t *dst, ptrdiff_t stride, int y, tl = top[-1]; for (y = 0; y < 4; y++) { - int l_m_tl = left[y] - tl; + int l_m_tl = left[3 - y] - tl; dst[0] = av_clip_uint8(top[0] + l_m_tl); dst[1] = av_clip_uint8(top[1] + l_m_tl); @@ -153,7 +153,7 @@ static void tm_8x8_c(uint8_t *dst, ptrdiff_t stride, int y, tl = top[-1]; for (y = 0; y < 8; y++) { - int l_m_tl = left[y] - tl; + int l_m_tl = left[7 - y] - tl; dst[0] = av_clip_uint8(top[0] + l_m_tl); dst[1] = av_clip_uint8(top[1] + l_m_tl); @@ -173,7 +173,7 @@ static void tm_16x16_c(uint8_t *dst, ptrdiff_t stride, int y, tl = top[-1]; for (y = 0; y < 16; y++) { - int l_m_tl = left[y] - tl; + int l_m_tl = left[15 - y] - tl; dst[ 0] = av_clip_uint8(top[ 0] + l_m_tl); dst[ 1] = av_clip_uint8(top[ 1] + l_m_tl); @@ -201,7 +201,7 @@ static void tm_32x32_c(uint8_t *dst, ptrdiff_t stride, int y, tl = top[-1]; for (y = 0; y < 32; y++) { - int l_m_tl = left[y] - tl; + int l_m_tl = left[31 - y] - tl; dst[ 0] = av_clip_uint8(top[ 0] + l_m_tl); dst[ 1] = av_clip_uint8(top[ 1] + l_m_tl); @@ -613,7 +613,7 @@ static void diag_downright_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top) { int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3], - l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3]; + l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0]; DST(0,3) = (l1 + l2 * 2 + l3 + 2) >> 2; DST(0,2) = DST(1,3) = (l0 + l1 * 2 + l2 + 2) >> 2; @@ -632,11 +632,11 @@ static void diag_downright_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \ uint8_t v[size + size - 1]; \ \ for (i = 0; i < size - 2; i++) { \ - v[i ] = (left[size - 1 - i] + left[size - 2 - i] * 2 + left[size - 3 - i] + 2) >> 2; \ - v[size + 1 + i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \ + v[i ] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \ + v[size + 1 + i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \ } \ - v[size - 2] = (left[1] + left[0] * 2 + top[-1] + 2) >> 2; \ - v[size - 1] = (left[0] + top[-1] * 2 + top[ 0] + 2) >> 2; \ + v[size - 2] = (left[size - 2] + left[size - 1] * 2 + top[-1] + 2) >> 2; \ + v[size - 1] = (left[size - 1] + top[-1] * 2 + top[ 0] + 2) >> 2; \ v[size ] = (top[-1] + top[0] * 2 + top[ 1] + 2) >> 2; \ \ for (j = 0; j < size; j++) \ @@ -651,7 +651,7 @@ static void vert_right_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top) { int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3], - l0 = left[0], l1 = left[1], l2 = left[2]; + l0 = left[3], l1 = left[2], l2 = left[1]; DST(0,3) = (l0 + l1 * 2 + l2 + 2) >> 2; DST(0,2) = (tl + l0 * 2 + l1 + 2) >> 2; @@ -673,14 +673,14 @@ static void vert_right_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \ uint8_t ve[size + size/2 - 1], vo[size + size/2 - 1]; \ \ for (i = 0; i < size/2 - 2; i++) { \ - vo[i] = (left[size - 4 - i*2] + left[size - 3 - i*2] * 2 + left[size - 2 - i*2] + 2) >> 2; \ - ve[i] = (left[size - 5 - i*2] + left[size - 4 - i*2] * 2 + left[size - 3 - i*2] + 2) >> 2; \ + vo[i] = (left[i*2 + 3] + left[i*2 + 2] * 2 + left[i*2 + 1] + 2) >> 2; \ + ve[i] = (left[i*2 + 4] + left[i*2 + 3] * 2 + left[i*2 + 2] + 2) >> 2; \ } \ - vo[size/2 - 2] = (left[0] + left[1] * 2 + left[2] + 2) >> 2; \ - ve[size/2 - 2] = (top[-1] + left[0] * 2 + left[1] + 2) >> 2; \ + vo[size/2 - 2] = (left[size - 1] + left[size - 2] * 2 + left[size - 3] + 2) >> 2; \ + ve[size/2 - 2] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \ \ ve[size/2 - 1] = (top[-1] + top[0] + 1) >> 1; \ - vo[size/2 - 1] = (left[0] + top[-1] * 2 + top[0] + 2) >> 2; \ + vo[size/2 - 1] = (left[size - 1] + top[-1] * 2 + top[0] + 2) >> 2; \ for (i = 0; i < size - 1; i++) { \ ve[size/2 + i] = (top[i] + top[i + 1] + 1) >> 1; \ vo[size/2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \ @@ -699,7 +699,7 @@ def_vert_right(32) static void hor_down_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top) { - int l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3], + int l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0], tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2]; DST(2,0) = (tl + a0 * 2 + a1 + 2) >> 2; @@ -722,14 +722,14 @@ static void hor_down_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \ uint8_t v[size * 3 - 2]; \ \ for (i = 0; i < size - 2; i++) { \ - v[i*2 ] = (left[size - 2 - i] + left[size - 1 - i] + 1) >> 1; \ - v[i*2 + 1] = (left[size - 3 - i] + left[size - 2 - i] * 2 + left[size - 1 - i] + 2) >> 2; \ + v[i*2 ] = (left[i + 1] + left[i + 0] + 1) >> 1; \ + v[i*2 + 1] = (left[i + 2] + left[i + 1] * 2 + left[i + 0] + 2) >> 2; \ v[size*2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \ } \ - v[size*2 - 2] = (top[-1] + left[0] + 1) >> 1; \ - v[size*2 - 4] = (left[0] + left[1] + 1) >> 1; \ - v[size*2 - 1] = (top[0] + top[-1] * 2 + left[0] + 2) >> 2; \ - v[size*2 - 3] = (top[-1] + left[0] * 2 + left[1] + 2) >> 2; \ + v[size*2 - 2] = (top[-1] + left[size - 1] + 1) >> 1; \ + v[size*2 - 4] = (left[size - 1] + left[size - 2] + 1) >> 1; \ + v[size*2 - 1] = (top[0] + top[-1] * 2 + left[size - 1] + 2) >> 2; \ + v[size*2 - 3] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \ \ for (j = 0; j < size; j++) \ memcpy(dst + j*stride, v + size*2 - 2 - j*2, size); \ @@ -786,7 +786,7 @@ def_vert_left(32) static void hor_up_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top) { - int l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3]; + int l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0]; DST(0,0) = (l0 + l1 + 1) >> 1; DST(1,0) = (l0 + l1 * 2 + l2 + 2) >> 2; @@ -805,17 +805,17 @@ static void hor_up_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \ uint8_t v[size*2 - 2]; \ \ for (i = 0; i < size - 2; i++) { \ - v[i*2 ] = (left[i] + left[i + 1] + 1) >> 1; \ - v[i*2 + 1] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \ + v[i*2 ] = (left[size - i - 1] + left[size - i - 2] + 1) >> 1; \ + v[i*2 + 1] = (left[size - i - 1] + left[size - i - 2] * 2 + left[size - i - 3] + 2) >> 2; \ } \ - v[size*2 - 4] = (left[size - 2] + left[size - 1] + 1) >> 1; \ - v[size*2 - 3] = (left[size - 2] + left[size - 1] * 3 + 2) >> 2; \ + v[size*2 - 4] = (left[1] + left[0] + 1) >> 1; \ + v[size*2 - 3] = (left[1] + left[0] * 3 + 2) >> 2; \ \ for (j = 0; j < size / 2; j++) \ memcpy(dst + j*stride, v + j*2, size); \ for (j = size / 2; j < size; j++) { \ memcpy(dst + j*stride, v + j*2, size*2 - 2 - j*2); \ - memset(dst + j*stride + size*2 - 2 - j*2, left[size - 1], \ + memset(dst + j*stride + size*2 - 2 - j*2, left[0], \ 2 + j*2 - size); \ } \ } |