diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2013-03-10 16:53:07 -0700 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-03-13 04:11:27 +0100 |
commit | 94b77678dcdf26cd91b20e8182f07af5f237ad27 (patch) | |
tree | c26167390918627e8a7167a0a6914e9d5d172b77 /libavcodec/alpha | |
parent | 6802c701063ef5cc85a09f9282a72c1cc578f54c (diff) | |
download | ffmpeg-94b77678dcdf26cd91b20e8182f07af5f237ad27.tar.gz |
Move alpha half-pel assembly from dsputil to hpeldsp.
Diffstat (limited to 'libavcodec/alpha')
-rw-r--r-- | libavcodec/alpha/Makefile | 2 | ||||
-rw-r--r-- | libavcodec/alpha/dsputil_alpha.c | 187 | ||||
-rw-r--r-- | libavcodec/alpha/dsputil_alpha.h | 2 | ||||
-rw-r--r-- | libavcodec/alpha/dsputil_alpha_asm.S | 92 | ||||
-rw-r--r-- | libavcodec/alpha/hpeldsp_alpha.c | 212 | ||||
-rw-r--r-- | libavcodec/alpha/hpeldsp_alpha.h | 27 | ||||
-rw-r--r-- | libavcodec/alpha/hpeldsp_alpha_asm.S | 135 |
7 files changed, 377 insertions, 280 deletions
diff --git a/libavcodec/alpha/Makefile b/libavcodec/alpha/Makefile index e28200d45a..6f22137167 100644 --- a/libavcodec/alpha/Makefile +++ b/libavcodec/alpha/Makefile @@ -4,4 +4,6 @@ OBJS += alpha/dsputil_alpha.o \ alpha/motion_est_mvi_asm.o \ alpha/simple_idct_alpha.o \ +OBJS-$(CONFIG_HPELDSP) += alpha/hpeldsp_alpha.o \ + alpha/hpeldsp_alpha_asm.o OBJS-$(CONFIG_MPEGVIDEO) += alpha/mpegvideo_alpha.o diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c index cb62665a55..03ba0a8881 100644 --- a/libavcodec/alpha/dsputil_alpha.c +++ b/libavcodec/alpha/dsputil_alpha.c @@ -119,197 +119,12 @@ static void clear_blocks_axp(int16_t *blocks) { } while (n); } -static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) -{ - return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); -} - -static inline uint64_t avg2(uint64_t a, uint64_t b) -{ - return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); -} - -#if 0 -/* The XY2 routines basically utilize this scheme, but reuse parts in - each iteration. */ -static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) -{ - uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) - + ((l2 & ~BYTE_VEC(0x03)) >> 2) - + ((l3 & ~BYTE_VEC(0x03)) >> 2) - + ((l4 & ~BYTE_VEC(0x03)) >> 2); - uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) - + (l2 & BYTE_VEC(0x03)) - + (l3 & BYTE_VEC(0x03)) - + (l4 & BYTE_VEC(0x03)) - + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); - return r1 + r2; -} -#endif - -#define OP(LOAD, STORE) \ - do { \ - STORE(LOAD(pixels), block); \ - pixels += line_size; \ - block += line_size; \ - } while (--h) - -#define OP_X2(LOAD, STORE) \ - do { \ - uint64_t pix1, pix2; \ - \ - pix1 = LOAD(pixels); \ - pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ - STORE(AVG2(pix1, pix2), block); \ - pixels += line_size; \ - block += line_size; \ - } while (--h) - -#define OP_Y2(LOAD, STORE) \ - do { \ - uint64_t pix = LOAD(pixels); \ - do { \ - uint64_t next_pix; \ - \ - pixels += line_size; \ - next_pix = LOAD(pixels); \ - STORE(AVG2(pix, next_pix), block); \ - block += line_size; \ - pix = next_pix; \ - } while (--h); \ - } while (0) - -#define OP_XY2(LOAD, STORE) \ - do { \ - uint64_t pix1 = LOAD(pixels); \ - uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ - uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \ - + (pix2 & BYTE_VEC(0x03)); \ - uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \ - + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \ - \ - do { \ - uint64_t npix1, npix2; \ - uint64_t npix_l, npix_h; \ - uint64_t avg; \ - \ - pixels += line_size; \ - npix1 = LOAD(pixels); \ - npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \ - npix_l = (npix1 & BYTE_VEC(0x03)) \ - + (npix2 & BYTE_VEC(0x03)); \ - npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \ - + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \ - avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \ - + pix_h + npix_h; \ - STORE(avg, block); \ - \ - block += line_size; \ - pix_l = npix_l; \ - pix_h = npix_h; \ - } while (--h); \ - } while (0) - -#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ -static void OPNAME ## _pixels ## SUFF ## _axp \ - (uint8_t *restrict block, const uint8_t *restrict pixels, \ - ptrdiff_t line_size, int h) \ -{ \ - if ((size_t) pixels & 0x7) { \ - OPKIND(uldq, STORE); \ - } else { \ - OPKIND(ldq, STORE); \ - } \ -} \ - \ -static void OPNAME ## _pixels16 ## SUFF ## _axp \ - (uint8_t *restrict block, const uint8_t *restrict pixels, \ - ptrdiff_t line_size, int h) \ -{ \ - OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \ - OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \ -} - -#define PIXOP(OPNAME, STORE) \ - MAKE_OP(OPNAME, , OP, STORE) \ - MAKE_OP(OPNAME, _x2, OP_X2, STORE) \ - MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \ - MAKE_OP(OPNAME, _xy2, OP_XY2, STORE) - -/* Rounding primitives. */ -#define AVG2 avg2 -#define AVG4 avg4 -#define AVG4_ROUNDER BYTE_VEC(0x02) -#define STORE(l, b) stq(l, b) -PIXOP(put, STORE); - -#undef STORE -#define STORE(l, b) stq(AVG2(l, ldq(b)), b); -PIXOP(avg, STORE); - -/* Not rounding primitives. */ -#undef AVG2 -#undef AVG4 -#undef AVG4_ROUNDER -#undef STORE -#define AVG2 avg2_no_rnd -#define AVG4 avg4_no_rnd -#define AVG4_ROUNDER BYTE_VEC(0x01) -#define STORE(l, b) stq(l, b) -PIXOP(put_no_rnd, STORE); - -#undef STORE -#define STORE(l, b) stq(AVG2(l, ldq(b)), b); -PIXOP(avg_no_rnd, STORE); - -static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h) -{ - put_pixels_axp_asm(block, pixels, line_size, h); - put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); -} - av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx) { const int high_bit_depth = avctx->bits_per_raw_sample > 8; if (!high_bit_depth) { - c->put_pixels_tab[0][0] = put_pixels16_axp_asm; - c->put_pixels_tab[0][1] = put_pixels16_x2_axp; - c->put_pixels_tab[0][2] = put_pixels16_y2_axp; - c->put_pixels_tab[0][3] = put_pixels16_xy2_axp; - - c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm; - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp; - c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp; - - c->avg_pixels_tab[0][0] = avg_pixels16_axp; - c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp; - c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp; - - c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp; - c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp; - c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp; - c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp; - - c->put_pixels_tab[1][0] = put_pixels_axp_asm; - c->put_pixels_tab[1][1] = put_pixels_x2_axp; - c->put_pixels_tab[1][2] = put_pixels_y2_axp; - c->put_pixels_tab[1][3] = put_pixels_xy2_axp; - - c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp; - c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp; - - c->avg_pixels_tab[1][0] = avg_pixels_axp; - c->avg_pixels_tab[1][1] = avg_pixels_x2_axp; - c->avg_pixels_tab[1][2] = avg_pixels_y2_axp; - c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp; - - c->clear_blocks = clear_blocks_axp; + c->clear_blocks = clear_blocks_axp; } /* amask clears all bits that correspond to present features. */ diff --git a/libavcodec/alpha/dsputil_alpha.h b/libavcodec/alpha/dsputil_alpha.h index cf5ca3b6bf..828cd167cc 100644 --- a/libavcodec/alpha/dsputil_alpha.h +++ b/libavcodec/alpha/dsputil_alpha.h @@ -26,8 +26,6 @@ void ff_simple_idct_axp(int16_t *block); void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block); void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block); -void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, int line_size); void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, diff --git a/libavcodec/alpha/dsputil_alpha_asm.S b/libavcodec/alpha/dsputil_alpha_asm.S index 557ba57a95..5c5f90a989 100644 --- a/libavcodec/alpha/dsputil_alpha_asm.S +++ b/libavcodec/alpha/dsputil_alpha_asm.S @@ -43,98 +43,6 @@ .text /************************************************************************ - * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, - * int line_size, int h) - */ - .align 6 - .globl put_pixels_axp_asm - .ent put_pixels_axp_asm -put_pixels_axp_asm: - .frame sp, 0, ra - .prologue 0 - - and a1, 7, t0 - beq t0, $aligned - - .align 4 -$unaligned: - ldq_u t0, 0(a1) - ldq_u t1, 8(a1) - addq a1, a2, a1 - nop - - ldq_u t2, 0(a1) - ldq_u t3, 8(a1) - addq a1, a2, a1 - nop - - ldq_u t4, 0(a1) - ldq_u t5, 8(a1) - addq a1, a2, a1 - nop - - ldq_u t6, 0(a1) - ldq_u t7, 8(a1) - extql t0, a1, t0 - addq a1, a2, a1 - - extqh t1, a1, t1 - addq a0, a2, t8 - extql t2, a1, t2 - addq t8, a2, t9 - - extqh t3, a1, t3 - addq t9, a2, ta - extql t4, a1, t4 - or t0, t1, t0 - - extqh t5, a1, t5 - or t2, t3, t2 - extql t6, a1, t6 - or t4, t5, t4 - - extqh t7, a1, t7 - or t6, t7, t6 - stq t0, 0(a0) - stq t2, 0(t8) - - stq t4, 0(t9) - subq a3, 4, a3 - stq t6, 0(ta) - addq ta, a2, a0 - - bne a3, $unaligned - ret - - .align 4 -$aligned: - ldq t0, 0(a1) - addq a1, a2, a1 - ldq t1, 0(a1) - addq a1, a2, a1 - - ldq t2, 0(a1) - addq a1, a2, a1 - ldq t3, 0(a1) - - addq a0, a2, t4 - addq a1, a2, a1 - addq t4, a2, t5 - subq a3, 4, a3 - - stq t0, 0(a0) - addq t5, a2, t6 - stq t1, 0(t4) - addq t6, a2, a0 - - stq t2, 0(t5) - stq t3, 0(t6) - - bne a3, $aligned - ret - .end put_pixels_axp_asm - -/************************************************************************ * void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, * int line_size) */ diff --git a/libavcodec/alpha/hpeldsp_alpha.c b/libavcodec/alpha/hpeldsp_alpha.c new file mode 100644 index 0000000000..9a092f57de --- /dev/null +++ b/libavcodec/alpha/hpeldsp_alpha.c @@ -0,0 +1,212 @@ +/* + * Alpha optimized DSP utils + * Copyright (c) 2002 Falk Hueffner <falk@debian.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/hpeldsp.h" +#include "hpeldsp_alpha.h" +#include "asm.h" + +static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) +{ + return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); +} + +static inline uint64_t avg2(uint64_t a, uint64_t b) +{ + return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); +} + +#if 0 +/* The XY2 routines basically utilize this scheme, but reuse parts in + each iteration. */ +static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) +{ + uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) + + ((l2 & ~BYTE_VEC(0x03)) >> 2) + + ((l3 & ~BYTE_VEC(0x03)) >> 2) + + ((l4 & ~BYTE_VEC(0x03)) >> 2); + uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) + + (l2 & BYTE_VEC(0x03)) + + (l3 & BYTE_VEC(0x03)) + + (l4 & BYTE_VEC(0x03)) + + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); + return r1 + r2; +} +#endif + +#define OP(LOAD, STORE) \ + do { \ + STORE(LOAD(pixels), block); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP_X2(LOAD, STORE) \ + do { \ + uint64_t pix1, pix2; \ + \ + pix1 = LOAD(pixels); \ + pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + STORE(AVG2(pix1, pix2), block); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP_Y2(LOAD, STORE) \ + do { \ + uint64_t pix = LOAD(pixels); \ + do { \ + uint64_t next_pix; \ + \ + pixels += line_size; \ + next_pix = LOAD(pixels); \ + STORE(AVG2(pix, next_pix), block); \ + block += line_size; \ + pix = next_pix; \ + } while (--h); \ + } while (0) + +#define OP_XY2(LOAD, STORE) \ + do { \ + uint64_t pix1 = LOAD(pixels); \ + uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \ + + (pix2 & BYTE_VEC(0x03)); \ + uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \ + + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \ + \ + do { \ + uint64_t npix1, npix2; \ + uint64_t npix_l, npix_h; \ + uint64_t avg; \ + \ + pixels += line_size; \ + npix1 = LOAD(pixels); \ + npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + npix_l = (npix1 & BYTE_VEC(0x03)) \ + + (npix2 & BYTE_VEC(0x03)); \ + npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \ + + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \ + avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \ + + pix_h + npix_h; \ + STORE(avg, block); \ + \ + block += line_size; \ + pix_l = npix_l; \ + pix_h = npix_h; \ + } while (--h); \ + } while (0) + +#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ +static void OPNAME ## _pixels ## SUFF ## _axp \ + (uint8_t *restrict block, const uint8_t *restrict pixels, \ + ptrdiff_t line_size, int h) \ +{ \ + if ((size_t) pixels & 0x7) { \ + OPKIND(uldq, STORE); \ + } else { \ + OPKIND(ldq, STORE); \ + } \ +} \ + \ +static void OPNAME ## _pixels16 ## SUFF ## _axp \ + (uint8_t *restrict block, const uint8_t *restrict pixels, \ + ptrdiff_t line_size, int h) \ +{ \ + OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \ + OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \ +} + +#define PIXOP(OPNAME, STORE) \ + MAKE_OP(OPNAME, , OP, STORE) \ + MAKE_OP(OPNAME, _x2, OP_X2, STORE) \ + MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \ + MAKE_OP(OPNAME, _xy2, OP_XY2, STORE) + +/* Rounding primitives. */ +#define AVG2 avg2 +#define AVG4 avg4 +#define AVG4_ROUNDER BYTE_VEC(0x02) +#define STORE(l, b) stq(l, b) +PIXOP(put, STORE); + +#undef STORE +#define STORE(l, b) stq(AVG2(l, ldq(b)), b); +PIXOP(avg, STORE); + +/* Not rounding primitives. */ +#undef AVG2 +#undef AVG4 +#undef AVG4_ROUNDER +#undef STORE +#define AVG2 avg2_no_rnd +#define AVG4 avg4_no_rnd +#define AVG4_ROUNDER BYTE_VEC(0x01) +#define STORE(l, b) stq(l, b) +PIXOP(put_no_rnd, STORE); + +#undef STORE +#define STORE(l, b) stq(AVG2(l, ldq(b)), b); +PIXOP(avg_no_rnd, STORE); + +static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h) +{ + put_pixels_axp_asm(block, pixels, line_size, h); + put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); +} + +void ff_hpeldsp_init_alpha(HpelDSPContext* c, int flags) +{ + c->put_pixels_tab[0][0] = put_pixels16_axp_asm; + c->put_pixels_tab[0][1] = put_pixels16_x2_axp; + c->put_pixels_tab[0][2] = put_pixels16_y2_axp; + c->put_pixels_tab[0][3] = put_pixels16_xy2_axp; + + c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm; + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp; + c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp; + + c->avg_pixels_tab[0][0] = avg_pixels16_axp; + c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp; + c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp; + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp; + + c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp; + c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp; + c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp; + c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp; + + c->put_pixels_tab[1][0] = put_pixels_axp_asm; + c->put_pixels_tab[1][1] = put_pixels_x2_axp; + c->put_pixels_tab[1][2] = put_pixels_y2_axp; + c->put_pixels_tab[1][3] = put_pixels_xy2_axp; + + c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp; + c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp; + + c->avg_pixels_tab[1][0] = avg_pixels_axp; + c->avg_pixels_tab[1][1] = avg_pixels_x2_axp; + c->avg_pixels_tab[1][2] = avg_pixels_y2_axp; + c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp; +} diff --git a/libavcodec/alpha/hpeldsp_alpha.h b/libavcodec/alpha/hpeldsp_alpha.h new file mode 100644 index 0000000000..53e8604acd --- /dev/null +++ b/libavcodec/alpha/hpeldsp_alpha.h @@ -0,0 +1,27 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_ALPHA_HPELDSP_ALPHA_H +#define AVCODEC_ALPHA_HPELDSP_ALPHA_H + +#include <stdint.h> + +void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); + +#endif /* AVCODEC_ALPHA_HPELDSP_ALPHA_H */ diff --git a/libavcodec/alpha/hpeldsp_alpha_asm.S b/libavcodec/alpha/hpeldsp_alpha_asm.S new file mode 100644 index 0000000000..afc3d423eb --- /dev/null +++ b/libavcodec/alpha/hpeldsp_alpha_asm.S @@ -0,0 +1,135 @@ +/* + * Alpha optimized DSP utils + * Copyright (c) 2002 Falk Hueffner <falk@debian.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * These functions are scheduled for pca56. They should work + * reasonably on ev6, though. + */ + +#include "regdef.h" + +/* Some nicer register names. */ +#define ta t10 +#define tb t11 +#define tc t12 +#define td AT +/* Danger: these overlap with the argument list and the return value */ +#define te a5 +#define tf a4 +#define tg a3 +#define th v0 + + .set noat + .set noreorder + .arch pca56 + .text + +/************************************************************************ + * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, + * int line_size, int h) + */ + .align 6 + .globl put_pixels_axp_asm + .ent put_pixels_axp_asm +put_pixels_axp_asm: + .frame sp, 0, ra + .prologue 0 + + and a1, 7, t0 + beq t0, $aligned + + .align 4 +$unaligned: + ldq_u t0, 0(a1) + ldq_u t1, 8(a1) + addq a1, a2, a1 + nop + + ldq_u t2, 0(a1) + ldq_u t3, 8(a1) + addq a1, a2, a1 + nop + + ldq_u t4, 0(a1) + ldq_u t5, 8(a1) + addq a1, a2, a1 + nop + + ldq_u t6, 0(a1) + ldq_u t7, 8(a1) + extql t0, a1, t0 + addq a1, a2, a1 + + extqh t1, a1, t1 + addq a0, a2, t8 + extql t2, a1, t2 + addq t8, a2, t9 + + extqh t3, a1, t3 + addq t9, a2, ta + extql t4, a1, t4 + or t0, t1, t0 + + extqh t5, a1, t5 + or t2, t3, t2 + extql t6, a1, t6 + or t4, t5, t4 + + extqh t7, a1, t7 + or t6, t7, t6 + stq t0, 0(a0) + stq t2, 0(t8) + + stq t4, 0(t9) + subq a3, 4, a3 + stq t6, 0(ta) + addq ta, a2, a0 + + bne a3, $unaligned + ret + + .align 4 +$aligned: + ldq t0, 0(a1) + addq a1, a2, a1 + ldq t1, 0(a1) + addq a1, a2, a1 + + ldq t2, 0(a1) + addq a1, a2, a1 + ldq t3, 0(a1) + + addq a0, a2, t4 + addq a1, a2, a1 + addq t4, a2, t5 + subq a3, 4, a3 + + stq t0, 0(a0) + addq t5, a2, t6 + stq t1, 0(t4) + addq t6, a2, a0 + + stq t2, 0(t5) + stq t3, 0(t6) + + bne a3, $aligned + ret + .end put_pixels_axp_asm |