diff options
author | Måns Rullgård <mans@mansr.com> | 2009-01-18 18:31:52 +0000 |
---|---|---|
committer | Måns Rullgård <mans@mansr.com> | 2009-01-18 18:31:52 +0000 |
commit | fd6045bae5bf097e33906eac03ac44f1946f85e1 (patch) | |
tree | 9b5657adafe4ad59794d832b377c6b3ae48161d6 | |
parent | 27a9466b21936b9580ad2271717e63c84d20bc81 (diff) | |
download | ffmpeg-fd6045bae5bf097e33906eac03ac44f1946f85e1.tar.gz |
Alpha: fix pix_abs16
Originally committed as revision 16675 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/alpha/dsputil_alpha.c | 12 | ||||
-rw-r--r-- | libavcodec/alpha/motion_est_mvi_asm.S | 105 |
2 files changed, 55 insertions, 62 deletions
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c index 74f5217a72..34e892c5a8 100644 --- a/libavcodec/alpha/dsputil_alpha.c +++ b/libavcodec/alpha/dsputil_alpha.c @@ -42,7 +42,7 @@ void get_pixels_mvi(DCTELEM *restrict block, void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride); int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); -int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size); +int pix_abs16x16_mvi_asm(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); @@ -287,11 +287,6 @@ void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); } -static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride) -{ - return pix_abs16x16_mvi_asm(a, b, stride); -} - void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) { c->put_pixels_tab[0][0] = put_pixels16_axp_asm; @@ -343,10 +338,9 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) c->get_pixels = get_pixels_mvi; c->diff_pixels = diff_pixels_mvi; - c->sad[0] = sad16x16_mvi; + c->sad[0] = pix_abs16x16_mvi_asm; c->sad[1] = pix_abs8x8_mvi; -// c->pix_abs[0][0] = pix_abs16x16_mvi_asm; //FIXME function arguments for the asm must be fixed - c->pix_abs[0][0] = sad16x16_mvi; + c->pix_abs[0][0] = pix_abs16x16_mvi_asm; c->pix_abs[1][0] = pix_abs8x8_mvi; c->pix_abs[0][1] = pix_abs16x16_x2_mvi; c->pix_abs[0][2] = pix_abs16x16_y2_mvi; diff --git a/libavcodec/alpha/motion_est_mvi_asm.S b/libavcodec/alpha/motion_est_mvi_asm.S index db1e30d424..2a08b07b0d 100644 --- a/libavcodec/alpha/motion_est_mvi_asm.S +++ b/libavcodec/alpha/motion_est_mvi_asm.S @@ -60,9 +60,8 @@ pix_abs16x16_mvi_asm: jsr AT, (AT), _mcount #endif - and a1, 7, t0 + and a2, 7, t0 clr v0 - lda a3, 16 beq t0, $aligned .align 4 $unaligned: @@ -86,80 +85,80 @@ $unaligned: td: error right */ /* load line 0 */ - ldq_u t0, 0(a1) # left_u - ldq_u t1, 8(a1) # mid - ldq_u t2, 16(a1) # right_u - ldq t3, 0(a0) # ref left - ldq t4, 8(a0) # ref right - addq a0, a2, a0 # pix1 - addq a1, a2, a1 # pix2 + ldq_u t0, 0(a2) # left_u + ldq_u t1, 8(a2) # mid + ldq_u t2, 16(a2) # right_u + ldq t3, 0(a1) # ref left + ldq t4, 8(a1) # ref right + addq a1, a3, a1 # pix1 + addq a2, a3, a2 # pix2 /* load line 1 */ - ldq_u t5, 0(a1) # left_u - ldq_u t6, 8(a1) # mid - ldq_u t7, 16(a1) # right_u - ldq t8, 0(a0) # ref left - ldq t9, 8(a0) # ref right - addq a0, a2, a0 # pix1 - addq a1, a2, a1 # pix2 + ldq_u t5, 0(a2) # left_u + ldq_u t6, 8(a2) # mid + ldq_u t7, 16(a2) # right_u + ldq t8, 0(a1) # ref left + ldq t9, 8(a1) # ref right + addq a1, a3, a1 # pix1 + addq a2, a3, a2 # pix2 /* calc line 0 */ - extql t0, a1, t0 # left lo - extqh t1, a1, ta # left hi - extql t1, a1, tb # right lo + extql t0, a2, t0 # left lo + extqh t1, a2, ta # left hi + extql t1, a2, tb # right lo or t0, ta, t0 # left - extqh t2, a1, t2 # right hi + extqh t2, a2, t2 # right hi perr t3, t0, tc # error left or t2, tb, t2 # right perr t4, t2, td # error right addq v0, tc, v0 # add error left addq v0, td, v0 # add error left /* calc line 1 */ - extql t5, a1, t5 # left lo - extqh t6, a1, ta # left hi - extql t6, a1, tb # right lo + extql t5, a2, t5 # left lo + extqh t6, a2, ta # left hi + extql t6, a2, tb # right lo or t5, ta, t5 # left - extqh t7, a1, t7 # right hi + extqh t7, a2, t7 # right hi perr t8, t5, tc # error left or t7, tb, t7 # right perr t9, t7, td # error right addq v0, tc, v0 # add error left addq v0, td, v0 # add error left /* loop */ - subq a3, 2, a3 # h -= 2 - bne a3, $unaligned + subq a4, 2, a4 # h -= 2 + bne a4, $unaligned ret .align 4 $aligned: /* load line 0 */ - ldq t0, 0(a1) # left - ldq t1, 8(a1) # right - addq a1, a2, a1 # pix2 - ldq t2, 0(a0) # ref left - ldq t3, 8(a0) # ref right - addq a0, a2, a0 # pix1 + ldq t0, 0(a2) # left + ldq t1, 8(a2) # right + addq a2, a3, a2 # pix2 + ldq t2, 0(a1) # ref left + ldq t3, 8(a1) # ref right + addq a1, a3, a1 # pix1 /* load line 1 */ - ldq t4, 0(a1) # left - ldq t5, 8(a1) # right - addq a1, a2, a1 # pix2 - ldq t6, 0(a0) # ref left - ldq t7, 8(a0) # ref right - addq a0, a2, a0 # pix1 + ldq t4, 0(a2) # left + ldq t5, 8(a2) # right + addq a2, a3, a2 # pix2 + ldq t6, 0(a1) # ref left + ldq t7, 8(a1) # ref right + addq a1, a3, a1 # pix1 /* load line 2 */ - ldq t8, 0(a1) # left - ldq t9, 8(a1) # right - addq a1, a2, a1 # pix2 - ldq ta, 0(a0) # ref left - ldq tb, 8(a0) # ref right - addq a0, a2, a0 # pix1 + ldq t8, 0(a2) # left + ldq t9, 8(a2) # right + addq a2, a3, a2 # pix2 + ldq ta, 0(a1) # ref left + ldq tb, 8(a1) # ref right + addq a1, a3, a1 # pix1 /* load line 3 */ - ldq tc, 0(a1) # left - ldq td, 8(a1) # right - addq a1, a2, a1 # pix2 - ldq te, 0(a0) # ref left - ldq tf, 8(a0) # ref right + ldq tc, 0(a2) # left + ldq td, 8(a2) # right + addq a2, a3, a2 # pix2 + ldq te, 0(a1) # ref left + ldq a0, 8(a1) # ref right /* calc line 0 */ perr t0, t2, t0 # error left - addq a0, a2, a0 # pix1 + addq a1, a3, a1 # pix1 perr t1, t3, t1 # error right addq v0, t0, v0 # add error left /* calc line 1 */ @@ -175,11 +174,11 @@ $aligned: /* calc line 3 */ perr tc, te, t0 # error left addq v0, t1, v0 # add error right - perr td, tf, t1 # error right + perr td, a0, t1 # error right addq v0, t0, v0 # add error left addq v0, t1, v0 # add error right /* loop */ - subq a3, 4, a3 # h -= 4 - bne a3, $aligned + subq a4, 4, a4 # h -= 4 + bne a4, $aligned ret .end pix_abs16x16_mvi_asm |