diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-11-01 21:41:01 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-11-01 22:01:11 +0100 |
commit | 754539a4095a40b111c40c169ba079c3e0018e74 (patch) | |
tree | f1588e7acfc3e16205eedd42797079a3bcdbdc51 | |
parent | 0dc22e92f464283c82f0b0b9dd2d8a2d3fd1674f (diff) | |
download | ffmpeg-754539a4095a40b111c40c169ba079c3e0018e74.tar.gz |
dirac: Fix mmx/sse haar wavelet compose
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/x86/dwt.c | 53 | ||||
-rw-r--r-- | libavcodec/x86/dwt_yasm.asm | 20 |
2 files changed, 37 insertions, 36 deletions
diff --git a/libavcodec/x86/dwt.c b/libavcodec/x86/dwt.c index cc0a71186e..1d04c7dcc9 100644 --- a/libavcodec/x86/dwt.c +++ b/libavcodec/x86/dwt.c @@ -30,6 +30,8 @@ void ff_vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \ void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \ void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \ +void ff_horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\ +void ff_horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\ \ static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \ { \ @@ -83,6 +85,28 @@ static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \ \ ff_vertical_compose_haar##ext(b0, b1, width_align); \ } \ +static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\ +{\ + int w2= w>>1;\ + int x= w2 - (w2&(align-1));\ + ff_horizontal_compose_haar0i##ext(b, tmp, w);\ +\ + for (; x < w2; x++) {\ + b[2*x ] = tmp[x];\ + b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\ + }\ +}\ +static void horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\ +{\ + int w2= w>>1;\ + int x= w2 - (w2&(align-1));\ + ff_horizontal_compose_haar1i##ext(b, tmp, w);\ +\ + for (; x < w2; x++) {\ + b[2*x ] = (tmp[x] + 1)>>1;\ + b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\ + }\ +}\ \ #if HAVE_YASM @@ -95,11 +119,6 @@ COMPOSE_VERTICAL(_sse2, 8) void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w); -void ff_horizontal_compose_haar0i_mmx(IDWTELEM *b, IDWTELEM *tmp, int w); -void ff_horizontal_compose_haar1i_mmx(IDWTELEM *b, IDWTELEM *tmp, int w); -void ff_horizontal_compose_haar0i_sse2(IDWTELEM *b, IDWTELEM *tmp, int w); -void ff_horizontal_compose_haar1i_sse2(IDWTELEM *b, IDWTELEM *tmp, int w); - void ff_horizontal_compose_dd97i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x) { for (; x < w2; x++) { @@ -108,22 +127,6 @@ void ff_horizontal_compose_dd97i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x } } -void ff_horizontal_compose_haar0i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x) -{ - for (; x < w2; x++) { - b[2*x ] = tmp[x]; - b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]); - } -} - -void ff_horizontal_compose_haar1i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x) -{ - for (; x < w2; x++) { - b[2*x ] = (tmp[x] + 1)>>1; - b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1; - } -} - void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type) { #if HAVE_YASM @@ -148,11 +151,11 @@ void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type) break; case DWT_DIRAC_HAAR0: d->vertical_compose = vertical_compose_haar_mmx; - d->horizontal_compose = ff_horizontal_compose_haar0i_mmx; + d->horizontal_compose = horizontal_compose_haar0i_mmx; break; case DWT_DIRAC_HAAR1: d->vertical_compose = vertical_compose_haar_mmx; - d->horizontal_compose = ff_horizontal_compose_haar1i_mmx; + d->horizontal_compose = horizontal_compose_haar1i_mmx; break; } #endif @@ -175,11 +178,11 @@ void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type) break; case DWT_DIRAC_HAAR0: d->vertical_compose = vertical_compose_haar_sse2; -//MMXDISABLED d->horizontal_compose = ff_horizontal_compose_haar0i_sse2; + d->horizontal_compose = horizontal_compose_haar0i_sse2; break; case DWT_DIRAC_HAAR1: d->vertical_compose = vertical_compose_haar_sse2; - d->horizontal_compose = ff_horizontal_compose_haar1i_sse2; + d->horizontal_compose = horizontal_compose_haar1i_sse2; break; } diff --git a/libavcodec/x86/dwt_yasm.asm b/libavcodec/x86/dwt_yasm.asm index b008906278..7d7471c6e2 100644 --- a/libavcodec/x86/dwt_yasm.asm +++ b/libavcodec/x86/dwt_yasm.asm @@ -22,8 +22,6 @@ %include "x86inc.asm" cextern horizontal_compose_dd97i_end_c -cextern horizontal_compose_haar0i_end_c -cextern horizontal_compose_haar1i_end_c SECTION_RODATA pw_1: times 8 dw 1 @@ -188,7 +186,7 @@ cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width ; void horizontal_compose_haari(IDWTELEM *b, IDWTELEM *tmp, int width) cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2 mov w2d, wd - xor xd, xd + xor xq, xq shr w2d, 1 lea b_w2q, [bq+wq] mova m3, [pw_1] @@ -199,13 +197,13 @@ cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2 psraw m1, 1 psubw m0, m1 mova [tmpq + 2*xq], m0 - add xd, mmsize/2 - cmp xd, w2d + add xq, mmsize/2 + cmp xq, w2q jl .lowpass_loop - xor xd, xd - and w2d, ~(mmsize/2 - 1) - cmp w2d, mmsize/2 + xor xq, xq + and w2q, ~(mmsize/2 - 1) + cmp w2q, mmsize/2 jl .end .highpass_loop: @@ -226,11 +224,11 @@ cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2 mova [bq+4*xq], m0 mova [bq+4*xq+mmsize], m2 - add xd, mmsize/2 - cmp xd, w2d + add xq, mmsize/2 + cmp xq, w2q jl .highpass_loop .end: - END_HORIZONTAL horizontal_compose_haar%2i_end_c + REP_RET %endmacro |