diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2013-04-22 18:47:16 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-04-22 18:59:23 +0200 |
commit | 7a556ebccfd6baf971d998127f9a1518b42830ae (patch) | |
tree | 14ae2456ae3e39588f1dfff4a6f4109ca35e152d | |
parent | 430d69c9421f2100e65b7e07c8d2a0f39534cbb6 (diff) | |
parent | 0c15a9aa7e1654a19144eb594f9639a57fd47482 (diff) | |
download | ffmpeg-7a556ebccfd6baf971d998127f9a1518b42830ae.tar.gz |
Merge commit '0c15a9aa7e1654a19144eb594f9639a57fd47482'
* commit '0c15a9aa7e1654a19144eb594f9639a57fd47482':
sh4: Remove dubious aligned dsputil code
Conflicts:
libavcodec/sh4/dsputil_align.c
libavcodec/sh4/h264chroma_init.c
libavcodec/sh4/hpeldsp.c
libavcodec/sh4/qpel.c
If someone wants to maintain the sh4 code in ffmpeg, wants to
add more optimizations, or volunteers to maintain any of
what is removed here and can confirm that they are faster.
Then please contact us!
Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/h264chroma.c | 2 | ||||
-rw-r--r-- | libavcodec/h264chroma.h | 1 | ||||
-rw-r--r-- | libavcodec/hpeldsp.c | 2 | ||||
-rw-r--r-- | libavcodec/hpeldsp.h | 1 | ||||
-rw-r--r-- | libavcodec/sh4/Makefile | 7 | ||||
-rw-r--r-- | libavcodec/sh4/dsputil_align.c | 298 | ||||
-rw-r--r-- | libavcodec/sh4/dsputil_sh4.c | 1 | ||||
-rw-r--r-- | libavcodec/sh4/dsputil_sh4.h | 10 | ||||
-rw-r--r-- | libavcodec/sh4/h264chroma_init.c | 132 | ||||
-rw-r--r-- | libavcodec/sh4/hpeldsp.c | 351 | ||||
-rw-r--r-- | libavcodec/sh4/qpel.c | 862 |
11 files changed, 1 insertions, 1666 deletions
diff --git a/libavcodec/h264chroma.c b/libavcodec/h264chroma.c index 3b780a04e2..ed41afe686 100644 --- a/libavcodec/h264chroma.c +++ b/libavcodec/h264chroma.c @@ -47,8 +47,6 @@ void ff_h264chroma_init(H264ChromaContext *c, int bit_depth) ff_h264chroma_init_arm(c, bit_depth); if (ARCH_PPC) ff_h264chroma_init_ppc(c, bit_depth); - if (ARCH_SH4) - ff_h264chroma_init_sh4(c, bit_depth); if (ARCH_X86) ff_h264chroma_init_x86(c, bit_depth); } diff --git a/libavcodec/h264chroma.h b/libavcodec/h264chroma.h index 4e035b0c38..46fae425ed 100644 --- a/libavcodec/h264chroma.h +++ b/libavcodec/h264chroma.h @@ -32,7 +32,6 @@ void ff_h264chroma_init(H264ChromaContext *c, int bit_depth); void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth); void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth); -void ff_h264chroma_init_sh4(H264ChromaContext *c, int bit_depth); void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth); #endif /* AVCODEC_H264CHROMA_H */ diff --git a/libavcodec/hpeldsp.c b/libavcodec/hpeldsp.c index bff779d827..473102ddde 100644 --- a/libavcodec/hpeldsp.c +++ b/libavcodec/hpeldsp.c @@ -62,8 +62,6 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags) ff_hpeldsp_init_bfin(c, flags); if (ARCH_PPC) ff_hpeldsp_init_ppc(c, flags); - if (ARCH_SH4) - ff_hpeldsp_init_sh4(c, flags); if (HAVE_VIS) ff_hpeldsp_init_vis(c, flags); if (ARCH_X86) diff --git a/libavcodec/hpeldsp.h b/libavcodec/hpeldsp.h index bf868a5c34..ec9006c31d 100644 --- a/libavcodec/hpeldsp.h +++ b/libavcodec/hpeldsp.h @@ -98,7 +98,6 @@ void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags); void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags); void ff_hpeldsp_init_bfin(HpelDSPContext *c, int flags); void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags); -void ff_hpeldsp_init_sh4(HpelDSPContext *c, int flags); void ff_hpeldsp_init_vis(HpelDSPContext *c, int flags); void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags); diff --git a/libavcodec/sh4/Makefile b/libavcodec/sh4/Makefile index a24b298ead..01a573b957 100644 --- a/libavcodec/sh4/Makefile +++ b/libavcodec/sh4/Makefile @@ -1,7 +1,2 @@ -OBJS += sh4/dsputil_align.o \ - sh4/dsputil_sh4.o \ +OBJS += sh4/dsputil_sh4.o \ sh4/idct_sh4.o \ - -OBJS-$(CONFIG_H264CHROMA) += sh4/h264chroma_init.o \ - -OBJS-$(CONFIG_HPELDSP) += sh4/hpeldsp.o diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c deleted file mode 100644 index d63001032d..0000000000 --- a/libavcodec/sh4/dsputil_align.c +++ /dev/null @@ -1,298 +0,0 @@ -/* - * aligned/packed access motion - * - * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/attributes.h" -#include "libavcodec/avcodec.h" -#include "libavcodec/dsputil.h" -#include "libavcodec/rnd_avg.h" -#include "dsputil_sh4.h" - - -#define LP(p) *(uint32_t*)(p) -#define LPC(p) *(const uint32_t*)(p) - - -#define UNPACK(ph,pl,tt0,tt1) do { \ - uint32_t t0,t1; t0=tt0;t1=tt1; \ - ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \ - pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0) - -#define rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02))>>2) & BYTE_VEC32(0x03)) -#define no_rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x01))>>2) & BYTE_VEC32(0x03)) - -/* little-endian */ -#define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) ) -#define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs+1))) ) -/* big -#define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) ) -#define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)) ) -*/ - - -#define put(d,s) d = s -#define avg(d,s) d = rnd_avg32(s,d) - -#define OP_C4(ofs) \ - ref-=ofs; \ - do { \ - OP(LP(dest),MERGE1(LPC(ref),LPC(ref+4),ofs)); \ - ref+=stride; \ - dest+=stride; \ - } while(--height) - -#define OP_C40() \ - do { \ - OP(LP(dest),LPC(ref)); \ - ref+=stride; \ - dest+=stride; \ - } while(--height) - -#define OP_C(ofs,sz,avg2) \ -{ \ - ref-=ofs; \ - do { \ - uint32_t t0,t1; \ - t0 = LPC(ref+0); \ - t1 = LPC(ref+4); \ - OP(LP(dest+0), MERGE1(t0,t1,ofs)); \ - t0 = LPC(ref+8); \ - OP(LP(dest+4), MERGE1(t1,t0,ofs)); \ -if (sz==16) { \ - t1 = LPC(ref+12); \ - OP(LP(dest+8), MERGE1(t0,t1,ofs)); \ - t0 = LPC(ref+16); \ - OP(LP(dest+12), MERGE1(t1,t0,ofs)); \ -} \ - ref+=stride; \ - dest+= stride; \ - } while(--height); \ -} - -/* aligned */ -#define OP_C0(sz,avg2) \ -{ \ - do { \ - OP(LP(dest+0), LPC(ref+0)); \ - OP(LP(dest+4), LPC(ref+4)); \ -if (sz==16) { \ - OP(LP(dest+8), LPC(ref+8)); \ - OP(LP(dest+12), LPC(ref+12)); \ -} \ - ref+=stride; \ - dest+= stride; \ - } while(--height); \ -} - -#define OP_X(ofs,sz,avg2) \ -{ \ - ref-=ofs; \ - do { \ - uint32_t t0,t1; \ - t0 = LPC(ref+0); \ - t1 = LPC(ref+4); \ - OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \ - t0 = LPC(ref+8); \ - OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \ -if (sz==16) { \ - t1 = LPC(ref+12); \ - OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \ - t0 = LPC(ref+16); \ - OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \ -} \ - ref+=stride; \ - dest+= stride; \ - } while(--height); \ -} - -/* aligned */ -#define OP_Y0(sz,avg2) \ -{ \ - uint32_t t0,t1,t2,t3,t; \ -\ - t0 = LPC(ref+0); \ - t1 = LPC(ref+4); \ -if (sz==16) { \ - t2 = LPC(ref+8); \ - t3 = LPC(ref+12); \ -} \ - do { \ - ref += stride; \ -\ - t = LPC(ref+0); \ - OP(LP(dest+0), avg2(t0,t)); t0 = t; \ - t = LPC(ref+4); \ - OP(LP(dest+4), avg2(t1,t)); t1 = t; \ -if (sz==16) { \ - t = LPC(ref+8); \ - OP(LP(dest+8), avg2(t2,t)); t2 = t; \ - t = LPC(ref+12); \ - OP(LP(dest+12), avg2(t3,t)); t3 = t; \ -} \ - dest+= stride; \ - } while(--height); \ -} - -#define OP_Y(ofs,sz,avg2) \ -{ \ - uint32_t t0,t1,t2,t3,t,w0,w1; \ -\ - ref-=ofs; \ - w0 = LPC(ref+0); \ - w1 = LPC(ref+4); \ - t0 = MERGE1(w0,w1,ofs); \ - w0 = LPC(ref+8); \ - t1 = MERGE1(w1,w0,ofs); \ -if (sz==16) { \ - w1 = LPC(ref+12); \ - t2 = MERGE1(w0,w1,ofs); \ - w0 = LPC(ref+16); \ - t3 = MERGE1(w1,w0,ofs); \ -} \ - do { \ - ref += stride; \ -\ - w0 = LPC(ref+0); \ - w1 = LPC(ref+4); \ - t = MERGE1(w0,w1,ofs); \ - OP(LP(dest+0), avg2(t0,t)); t0 = t; \ - w0 = LPC(ref+8); \ - t = MERGE1(w1,w0,ofs); \ - OP(LP(dest+4), avg2(t1,t)); t1 = t; \ -if (sz==16) { \ - w1 = LPC(ref+12); \ - t = MERGE1(w0,w1,ofs); \ - OP(LP(dest+8), avg2(t2,t)); t2 = t; \ - w0 = LPC(ref+16); \ - t = MERGE1(w1,w0,ofs); \ - OP(LP(dest+12), avg2(t3,t)); t3 = t; \ -} \ - dest+=stride; \ - } while(--height); \ -} - -#define OP_X0(sz,avg2) OP_X(0,sz,avg2) -#define OP_XY0(sz,PACK) OP_XY(0,sz,PACK) -#define OP_XY(ofs,sz,PACK) \ -{ \ - uint32_t t2,t3,w0,w1; \ - uint32_t a0,a1,a2,a3,a4,a5,a6,a7; \ -\ - ref -= ofs; \ - w0 = LPC(ref+0); \ - w1 = LPC(ref+4); \ - UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ - w0 = LPC(ref+8); \ - UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ -if (sz==16) { \ - w1 = LPC(ref+12); \ - UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ - w0 = LPC(ref+16); \ - UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ -} \ - do { \ - ref+=stride; \ - w0 = LPC(ref+0); \ - w1 = LPC(ref+4); \ - UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ - OP(LP(dest+0),PACK(a0,a1,t2,t3)); \ - a0 = t2; a1 = t3; \ - w0 = LPC(ref+8); \ - UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ - OP(LP(dest+4),PACK(a2,a3,t2,t3)); \ - a2 = t2; a3 = t3; \ -if (sz==16) { \ - w1 = LPC(ref+12); \ - UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ - OP(LP(dest+8),PACK(a4,a5,t2,t3)); \ - a4 = t2; a5 = t3; \ - w0 = LPC(ref+16); \ - UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ - OP(LP(dest+12),PACK(a6,a7,t2,t3)); \ - a6 = t2; a7 = t3; \ -} \ - dest+=stride; \ - } while(--height); \ -} - -#define put_pixels8_c ff_put_rnd_pixels8_o -#define put_pixels16_c ff_put_rnd_pixels16_o -#define avg_pixels8_c ff_avg_rnd_pixels8_o -#define avg_pixels16_c ff_avg_rnd_pixels16_o -#define put_no_rnd_pixels8_c ff_put_rnd_pixels8_o -#define put_no_rnd_pixels16_c ff_put_rnd_pixels16_o -#define avg_no_rnd_pixels16_c ff_avg_rnd_pixels16_o - -#if CONFIG_HPELDSP - -#include "qpel.c" - -#endif - -av_cold void ff_dsputil_init_align(DSPContext *c, AVCodecContext *avctx) -{ -#if CONFIG_HPELDSP - -#define dspfunc(PFX, IDX, NUM) \ - c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_sh4; \ - c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_sh4; \ - c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_sh4; \ - c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_sh4; \ - c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_sh4; \ - c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_sh4; \ - c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_sh4; \ - c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_sh4; \ - c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_sh4; \ - c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_sh4; \ - c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_sh4; \ - c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_sh4; \ - c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_sh4; \ - c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_sh4; \ - c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_sh4; \ - c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_sh4 - - dspfunc(put_qpel, 0, 16); - dspfunc(put_no_rnd_qpel, 0, 16); - - dspfunc(avg_qpel, 0, 16); - /* dspfunc(avg_no_rnd_qpel, 0, 16); */ - - dspfunc(put_qpel, 1, 8); - dspfunc(put_no_rnd_qpel, 1, 8); - - dspfunc(avg_qpel, 1, 8); - /* dspfunc(avg_no_rnd_qpel, 1, 8); */ - -#undef dspfunc - - c->put_mspel_pixels_tab[0]= put_mspel8_mc00_sh4; - c->put_mspel_pixels_tab[1]= put_mspel8_mc10_sh4; - c->put_mspel_pixels_tab[2]= put_mspel8_mc20_sh4; - c->put_mspel_pixels_tab[3]= put_mspel8_mc30_sh4; - c->put_mspel_pixels_tab[4]= put_mspel8_mc02_sh4; - c->put_mspel_pixels_tab[5]= put_mspel8_mc12_sh4; - c->put_mspel_pixels_tab[6]= put_mspel8_mc22_sh4; - c->put_mspel_pixels_tab[7]= put_mspel8_mc32_sh4; - - c->gmc1 = gmc1_c; - -#endif -} diff --git a/libavcodec/sh4/dsputil_sh4.c b/libavcodec/sh4/dsputil_sh4.c index 3d0baf8575..82b75ae6c3 100644 --- a/libavcodec/sh4/dsputil_sh4.c +++ b/libavcodec/sh4/dsputil_sh4.c @@ -94,7 +94,6 @@ av_cold void ff_dsputil_init_sh4(DSPContext *c, AVCodecContext *avctx) { const int idct_algo= avctx->idct_algo; const int high_bit_depth = avctx->bits_per_raw_sample > 8; - ff_dsputil_init_align(c,avctx); if (!high_bit_depth) c->clear_blocks = clear_blocks_sh4; diff --git a/libavcodec/sh4/dsputil_sh4.h b/libavcodec/sh4/dsputil_sh4.h index 42f1fe1452..2ba93549fb 100644 --- a/libavcodec/sh4/dsputil_sh4.h +++ b/libavcodec/sh4/dsputil_sh4.h @@ -24,15 +24,5 @@ #include "libavcodec/hpeldsp.h" void ff_idct_sh4(int16_t *block); -void ff_dsputil_init_align(DSPContext* c, AVCodecContext *avctx); - -void ff_put_rnd_pixels8_o(uint8_t *dest, const uint8_t *ref, - const ptrdiff_t stride, int height); -void ff_put_rnd_pixels16_o(uint8_t *dest, const uint8_t *ref, - const ptrdiff_t stride, int height); -void ff_avg_rnd_pixels8_o (uint8_t *dest, const uint8_t *ref, - const ptrdiff_t stride, int height); -void ff_avg_rnd_pixels16_o(uint8_t *dest, const uint8_t *ref, - const ptrdiff_t stride, int height); #endif /* AVCODEC_SH4_DSPUTIL_SH4_H */ diff --git a/libavcodec/sh4/h264chroma_init.c b/libavcodec/sh4/h264chroma_init.c deleted file mode 100644 index d15f0ae34e..0000000000 --- a/libavcodec/sh4/h264chroma_init.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * aligned/packed access motion - * - * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <assert.h> -#include <stdint.h> - -#include "libavutil/attributes.h" -#include "libavcodec/h264chroma.h" - -#define H264_CHROMA_MC(OPNAME, OP)\ -static void OPNAME ## h264_chroma_mc2_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ - const int A=(8-x)*(8-y);\ - const int B=( x)*(8-y);\ - const int C=(8-x)*( y);\ - const int D=( x)*( y);\ - \ - assert(x<8 && y<8 && x>=0 && y>=0);\ -\ - do {\ - int t0,t1,t2,t3; \ - uint8_t *s0 = src; \ - uint8_t *s1 = src+stride; \ - t0 = *s0++; t2 = *s1++; \ - t1 = *s0++; t3 = *s1++; \ - OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ - t0 = *s0++; t2 = *s1++; \ - OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ - dst+= stride;\ - src+= stride;\ - }while(--h);\ -}\ -\ -static void OPNAME ## h264_chroma_mc4_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ - const int A=(8-x)*(8-y);\ - const int B=( x)*(8-y);\ - const int C=(8-x)*( y);\ - const int D=( x)*( y);\ - \ - assert(x<8 && y<8 && x>=0 && y>=0);\ -\ - do {\ - int t0,t1,t2,t3; \ - uint8_t *s0 = src; \ - uint8_t *s1 = src+stride; \ - t0 = *s0++; t2 = *s1++; \ - t1 = *s0++; t3 = *s1++; \ - OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ - t0 = *s0++; t2 = *s1++; \ - OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ - t1 = *s0++; t3 = *s1++; \ - OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\ - t0 = *s0++; t2 = *s1++; \ - OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\ - dst+= stride;\ - src+= stride;\ - }while(--h);\ -}\ -\ -static void OPNAME ## h264_chroma_mc8_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ - const int A=(8-x)*(8-y);\ - const int B=( x)*(8-y);\ - const int C=(8-x)*( y);\ - const int D=( x)*( y);\ - \ - assert(x<8 && y<8 && x>=0 && y>=0);\ -\ - do {\ - int t0,t1,t2,t3; \ - uint8_t *s0 = src; \ - uint8_t *s1 = src+stride; \ - t0 = *s0++; t2 = *s1++; \ - t1 = *s0++; t3 = *s1++; \ - OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ - t0 = *s0++; t2 = *s1++; \ - OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ - t1 = *s0++; t3 = *s1++; \ - OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\ - t0 = *s0++; t2 = *s1++; \ - OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\ - t1 = *s0++; t3 = *s1++; \ - OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\ - t0 = *s0++; t2 = *s1++; \ - OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\ - t1 = *s0++; t3 = *s1++; \ - OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\ - t0 = *s0++; t2 = *s1++; \ - OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\ - dst+= stride;\ - src+= stride;\ - }while(--h);\ -} - -#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) -#define op_put(a, b) a = (((b) + 32)>>6) - -H264_CHROMA_MC(put_ , op_put) -H264_CHROMA_MC(avg_ , op_avg) -#undef op_avg -#undef op_put - -av_cold void ff_h264chroma_init_sh4(H264ChromaContext *c, int bit_depth) -{ - const int high_bit_depth = bit_depth > 8; - - if (!high_bit_depth) { - c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4; - c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4; - c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4; - c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4; - c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4; - c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4; - } -} diff --git a/libavcodec/sh4/hpeldsp.c b/libavcodec/sh4/hpeldsp.c deleted file mode 100644 index d604b5be0f..0000000000 --- a/libavcodec/sh4/hpeldsp.c +++ /dev/null @@ -1,351 +0,0 @@ -/* - * aligned/packed access motion - * - * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/attributes.h" -#include "libavcodec/avcodec.h" -#include "libavcodec/dsputil.h" -#include "libavcodec/bit_depth_template.c" // for BYTE_VEC32 -#include "libavcodec/hpeldsp.h" -#include "libavcodec/rnd_avg.h" -#include "dsputil_sh4.h" - - -#define LP(p) *(uint32_t*)(p) -#define LPC(p) *(const uint32_t*)(p) - - -#define UNPACK(ph,pl,tt0,tt1) do { \ - uint32_t t0,t1; t0=tt0;t1=tt1; \ - ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \ - pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0) - -#define rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02))>>2) & BYTE_VEC32(0x03)) -#define no_rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x01))>>2) & BYTE_VEC32(0x03)) - -/* little-endian */ -#define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) ) -#define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs+1))) ) -/* big -#define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) ) -#define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)) ) -*/ - - -#define put(d,s) d = s -#define avg(d,s) d = rnd_avg32(s,d) - -#define OP_C4(ofs) \ - ref-=ofs; \ - do { \ - OP(LP(dest),MERGE1(LPC(ref),LPC(ref+4),ofs)); \ - ref+=stride; \ - dest+=stride; \ - } while(--height) - -#define OP_C40() \ - do { \ - OP(LP(dest),LPC(ref)); \ - ref+=stride; \ - dest+=stride; \ - } while(--height) - - -#define OP put - -static void put_pixels4_c(uint8_t *dest, const uint8_t *ref, - const int stride, int height) -{ - switch((int)ref&3){ - case 0: OP_C40(); return; - case 1: OP_C4(1); return; - case 2: OP_C4(2); return; - case 3: OP_C4(3); return; - } -} - -#undef OP -#define OP avg - -static void avg_pixels4_c(uint8_t *dest, const uint8_t *ref, - const int stride, int height) -{ - switch((int)ref&3){ - case 0: OP_C40(); return; - case 1: OP_C4(1); return; - case 2: OP_C4(2); return; - case 3: OP_C4(3); return; - } -} - -#undef OP - -#define OP_C(ofs,sz,avg2) \ -{ \ - ref-=ofs; \ - do { \ - uint32_t t0,t1; \ - t0 = LPC(ref+0); \ - t1 = LPC(ref+4); \ - OP(LP(dest+0), MERGE1(t0,t1,ofs)); \ - t0 = LPC(ref+8); \ - OP(LP(dest+4), MERGE1(t1,t0,ofs)); \ -if (sz==16) { \ - t1 = LPC(ref+12); \ - OP(LP(dest+8), MERGE1(t0,t1,ofs)); \ - t0 = LPC(ref+16); \ - OP(LP(dest+12), MERGE1(t1,t0,ofs)); \ -} \ - ref+=stride; \ - dest+= stride; \ - } while(--height); \ -} - -/* aligned */ -#define OP_C0(sz,avg2) \ -{ \ - do { \ - OP(LP(dest+0), LPC(ref+0)); \ - OP(LP(dest+4), LPC(ref+4)); \ -if (sz==16) { \ - OP(LP(dest+8), LPC(ref+8)); \ - OP(LP(dest+12), LPC(ref+12)); \ -} \ - ref+=stride; \ - dest+= stride; \ - } while(--height); \ -} - -#define OP_X(ofs,sz,avg2) \ -{ \ - ref-=ofs; \ - do { \ - uint32_t t0,t1; \ - t0 = LPC(ref+0); \ - t1 = LPC(ref+4); \ - OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \ - t0 = LPC(ref+8); \ - OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \ -if (sz==16) { \ - t1 = LPC(ref+12); \ - OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \ - t0 = LPC(ref+16); \ - OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \ -} \ - ref+=stride; \ - dest+= stride; \ - } while(--height); \ -} - -/* aligned */ -#define OP_Y0(sz,avg2) \ -{ \ - uint32_t t0,t1,t2,t3,t; \ -\ - t0 = LPC(ref+0); \ - t1 = LPC(ref+4); \ -if (sz==16) { \ - t2 = LPC(ref+8); \ - t3 = LPC(ref+12); \ -} \ - do { \ - ref += stride; \ -\ - t = LPC(ref+0); \ - OP(LP(dest+0), avg2(t0,t)); t0 = t; \ - t = LPC(ref+4); \ - OP(LP(dest+4), avg2(t1,t)); t1 = t; \ -if (sz==16) { \ - t = LPC(ref+8); \ - OP(LP(dest+8), avg2(t2,t)); t2 = t; \ - t = LPC(ref+12); \ - OP(LP(dest+12), avg2(t3,t)); t3 = t; \ -} \ - dest+= stride; \ - } while(--height); \ -} - -#define OP_Y(ofs,sz,avg2) \ -{ \ - uint32_t t0,t1,t2,t3,t,w0,w1; \ -\ - ref-=ofs; \ - w0 = LPC(ref+0); \ - w1 = LPC(ref+4); \ - t0 = MERGE1(w0,w1,ofs); \ - w0 = LPC(ref+8); \ - t1 = MERGE1(w1,w0,ofs); \ -if (sz==16) { \ - w1 = LPC(ref+12); \ - t2 = MERGE1(w0,w1,ofs); \ - w0 = LPC(ref+16); \ - t3 = MERGE1(w1,w0,ofs); \ -} \ - do { \ - ref += stride; \ -\ - w0 = LPC(ref+0); \ - w1 = LPC(ref+4); \ - t = MERGE1(w0,w1,ofs); \ - OP(LP(dest+0), avg2(t0,t)); t0 = t; \ - w0 = LPC(ref+8); \ - t = MERGE1(w1,w0,ofs); \ - OP(LP(dest+4), avg2(t1,t)); t1 = t; \ -if (sz==16) { \ - w1 = LPC(ref+12); \ - t = MERGE1(w0,w1,ofs); \ - OP(LP(dest+8), avg2(t2,t)); t2 = t; \ - w0 = LPC(ref+16); \ - t = MERGE1(w1,w0,ofs); \ - OP(LP(dest+12), avg2(t3,t)); t3 = t; \ -} \ - dest+=stride; \ - } while(--height); \ -} - -#define OP_X0(sz,avg2) OP_X(0,sz,avg2) -#define OP_XY0(sz,PACK) OP_XY(0,sz,PACK) -#define OP_XY(ofs,sz,PACK) \ -{ \ - uint32_t t2,t3,w0,w1; \ - uint32_t a0,a1,a2,a3,a4,a5,a6,a7; \ -\ - ref -= ofs; \ - w0 = LPC(ref+0); \ - w1 = LPC(ref+4); \ - UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ - w0 = LPC(ref+8); \ - UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ -if (sz==16) { \ - w1 = LPC(ref+12); \ - UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ - w0 = LPC(ref+16); \ - UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ -} \ - do { \ - ref+=stride; \ - w0 = LPC(ref+0); \ - w1 = LPC(ref+4); \ - UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ - OP(LP(dest+0),PACK(a0,a1,t2,t3)); \ - a0 = t2; a1 = t3; \ - w0 = LPC(ref+8); \ - UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ - OP(LP(dest+4),PACK(a2,a3,t2,t3)); \ - a2 = t2; a3 = t3; \ -if (sz==16) { \ - w1 = LPC(ref+12); \ - UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ - OP(LP(dest+8),PACK(a4,a5,t2,t3)); \ - a4 = t2; a5 = t3; \ - w0 = LPC(ref+16); \ - UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ - OP(LP(dest+12),PACK(a6,a7,t2,t3)); \ - a6 = t2; a7 = t3; \ -} \ - dest+=stride; \ - } while(--height); \ -} - -#define DEFFUNC(prefix, op, rnd, xy, sz, OP_N, avgfunc) \ -prefix void op##_##rnd##_pixels##sz##_##xy(uint8_t *dest, const uint8_t *ref, \ - const ptrdiff_t stride, int height) \ -{ \ - switch((int)ref&3) { \ - case 0:OP_N##0(sz,rnd##_##avgfunc); return; \ - case 1:OP_N(1,sz,rnd##_##avgfunc); return; \ - case 2:OP_N(2,sz,rnd##_##avgfunc); return; \ - case 3:OP_N(3,sz,rnd##_##avgfunc); return; \ - } \ -} - -#define OP put - -DEFFUNC( ,ff_put,rnd,o,8,OP_C,avg32) -DEFFUNC(static,put, rnd,x,8,OP_X,avg32) -DEFFUNC(static,put,no_rnd,x,8,OP_X,avg32) -DEFFUNC(static,put, rnd,y,8,OP_Y,avg32) -DEFFUNC(static,put,no_rnd,y,8,OP_Y,avg32) -DEFFUNC(static,put, rnd,xy,8,OP_XY,PACK) -DEFFUNC(static,put,no_rnd,xy,8,OP_XY,PACK) -DEFFUNC( ,ff_put,rnd,o,16,OP_C,avg32) -DEFFUNC(static,put, rnd,x,16,OP_X,avg32) -DEFFUNC(static,put,no_rnd,x,16,OP_X,avg32) -DEFFUNC(static,put, rnd,y,16,OP_Y,avg32) -DEFFUNC(static,put,no_rnd,y,16,OP_Y,avg32) -DEFFUNC(static,put, rnd,xy,16,OP_XY,PACK) -DEFFUNC(static,put,no_rnd,xy,16,OP_XY,PACK) - -#undef OP -#define OP avg - -DEFFUNC( ,ff_avg,rnd,o,8,OP_C,avg32) -DEFFUNC(static,avg, rnd,x,8,OP_X,avg32) -DEFFUNC(static,avg, rnd,y,8,OP_Y,avg32) -DEFFUNC(static,avg, rnd,xy,8,OP_XY,PACK) -DEFFUNC( ,ff_avg,rnd,o,16,OP_C,avg32) -DEFFUNC(static,avg, rnd,x,16,OP_X,avg32) -DEFFUNC(static,avg,no_rnd,x,16,OP_X,avg32) -DEFFUNC(static,avg, rnd,y,16,OP_Y,avg32) -DEFFUNC(static,avg,no_rnd,y,16,OP_Y,avg32) -DEFFUNC(static,avg, rnd,xy,16,OP_XY,PACK) -DEFFUNC(static,avg,no_rnd,xy,16,OP_XY,PACK) - -#undef OP - -#define ff_put_no_rnd_pixels8_o ff_put_rnd_pixels8_o -#define ff_put_no_rnd_pixels16_o ff_put_rnd_pixels16_o -#define ff_avg_no_rnd_pixels16_o ff_avg_rnd_pixels16_o - -av_cold void ff_hpeldsp_init_sh4(HpelDSPContext *c, int flags) -{ - c->put_pixels_tab[0][0] = ff_put_rnd_pixels16_o; - c->put_pixels_tab[0][1] = put_rnd_pixels16_x; - c->put_pixels_tab[0][2] = put_rnd_pixels16_y; - c->put_pixels_tab[0][3] = put_rnd_pixels16_xy; - c->put_pixels_tab[1][0] = ff_put_rnd_pixels8_o; - c->put_pixels_tab[1][1] = put_rnd_pixels8_x; - c->put_pixels_tab[1][2] = put_rnd_pixels8_y; - c->put_pixels_tab[1][3] = put_rnd_pixels8_xy; - - c->put_no_rnd_pixels_tab[0][0] = ff_put_no_rnd_pixels16_o; - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y; - c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy; - c->put_no_rnd_pixels_tab[1][0] = ff_put_no_rnd_pixels8_o; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y; - c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy; - - c->avg_pixels_tab[0][0] = ff_avg_rnd_pixels16_o; - c->avg_pixels_tab[0][1] = avg_rnd_pixels16_x; - c->avg_pixels_tab[0][2] = avg_rnd_pixels16_y; - c->avg_pixels_tab[0][3] = avg_rnd_pixels16_xy; - c->avg_pixels_tab[1][0] = ff_avg_rnd_pixels8_o; - c->avg_pixels_tab[1][1] = avg_rnd_pixels8_x; - c->avg_pixels_tab[1][2] = avg_rnd_pixels8_y; - c->avg_pixels_tab[1][3] = avg_rnd_pixels8_xy; - - c->avg_no_rnd_pixels_tab[0] = ff_avg_no_rnd_pixels16_o; - c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x; - c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y; - c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy; -} diff --git a/libavcodec/sh4/qpel.c b/libavcodec/sh4/qpel.c deleted file mode 100644 index 2675f12a81..0000000000 --- a/libavcodec/sh4/qpel.c +++ /dev/null @@ -1,862 +0,0 @@ -/* - * This is optimized for sh, which have post increment addressing (*p++). - * Some CPU may be index (p[n]) faster than post increment (*p++). - * - * copyright (c) 2001-2003 BERO <bero@geocities.co.jp> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/common.h" -#include "libavcodec/copy_block.h" -#include "libavcodec/rnd_avg.h" - -#define PIXOP2(OPNAME, OP) \ -\ -static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ -{\ - do {\ - OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ -}\ -\ -static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ -{\ - do {\ - OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ -{\ - do {\ - OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \ - OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LPC(src2+8)) ); \ - OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LPC(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ -}\ -\ -static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ -{\ - do {\ - OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \ - OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LPC(src2+8)) ); \ - OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LPC(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ -{\ - do { /* onlye src2 aligned */\ - OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ -}\ -\ -static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ -{\ - do {\ - OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ -{\ - do {\ - OP(LP(dst ),no_rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ -}\ -\ -static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ -{\ - do {\ - OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ -{\ - do {\ - OP(LP(dst ),no_rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \ - OP(LP(dst+8),no_rnd_avg32(LPC(src1+8),LPC(src2+8)) ); \ - OP(LP(dst+12),no_rnd_avg32(LPC(src1+12),LPC(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ -}\ -\ -static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ -{\ - do {\ - OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \ - OP(LP(dst+8),rnd_avg32(LPC(src1+8),LPC(src2+8)) ); \ - OP(LP(dst+12),rnd_avg32(LPC(src1+12),LPC(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ -{ OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \ -\ -static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ -{ OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \ -\ -static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ -{ OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \ -\ -static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ -{ OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \ -\ -static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LPC(src1),LPC(src2)); \ - UNPACK(a2,a3,LPC(src3),LPC(src4)); \ - OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \ - UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ - OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ -} \ -\ -static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LPC(src1),LPC(src2)); \ - UNPACK(a2,a3,LPC(src3),LPC(src4)); \ - OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \ - UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ - OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ -} \ -\ -static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ - UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \ - UNPACK(a2,a3,LPC(src3),LPC(src4)); \ - OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \ - UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ - OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ -} \ -\ -static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \ - UNPACK(a2,a3,LPC(src3),LPC(src4)); \ - OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \ - UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ - OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ -} \ -\ -static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LPC(src1),LPC(src2)); \ - UNPACK(a2,a3,LPC(src3),LPC(src4)); \ - OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \ - UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ - OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LPC(src1+8),LPC(src2+8)); \ - UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \ - OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LPC(src1+12),LPC(src2+12)); \ - UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \ - OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ -} \ -\ -static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LPC(src1),LPC(src2)); \ - UNPACK(a2,a3,LPC(src3),LPC(src4)); \ - OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \ - UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ - OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LPC(src1+8),LPC(src2+8)); \ - UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \ - OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LPC(src1+12),LPC(src2+12)); \ - UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \ - OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ -} \ -\ -static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { /* src1 is unaligned */\ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \ - UNPACK(a2,a3,LPC(src3),LPC(src4)); \ - OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \ - UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ - OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,AV_RN32(src1+8),LPC(src2+8)); \ - UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \ - OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,AV_RN32(src1+12),LPC(src2+12)); \ - UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \ - OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ -} \ -\ -static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \ - UNPACK(a2,a3,LPC(src3),LPC(src4)); \ - OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \ - UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ - OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,AV_RN32(src1+8),LPC(src2+8)); \ - UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \ - OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,AV_RN32(src1+12),LPC(src2+12)); \ - UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \ - OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ -} \ -\ - -#define op_avg(a, b) a = rnd_avg32(a,b) -#define op_put(a, b) a = b - -PIXOP2(avg, op_avg) -PIXOP2(put, op_put) -#undef op_avg -#undef op_put - -#define avg2(a,b) ((a+b+1)>>1) -#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) - - -static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) -{ - const int A=(16-x16)*(16-y16); - const int B=( x16)*(16-y16); - const int C=(16-x16)*( y16); - const int D=( x16)*( y16); - - do { - int t0,t1,t2,t3; - uint8_t *s0 = src; - uint8_t *s1 = src+stride; - t0 = *s0++; t2 = *s1++; - t1 = *s0++; t3 = *s1++; - dst[0]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; - t0 = *s0++; t2 = *s1++; - dst[1]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; - t1 = *s0++; t3 = *s1++; - dst[2]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; - t0 = *s0++; t2 = *s1++; - dst[3]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; - t1 = *s0++; t3 = *s1++; - dst[4]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; - t0 = *s0++; t2 = *s1++; - dst[5]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; - t1 = *s0++; t3 = *s1++; - dst[6]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; - t0 = *s0++; t2 = *s1++; - dst[7]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; - dst+= stride; - src+= stride; - }while(--h); -} - -#define QPEL_MC(r, OPNAME, RND, OP) \ -static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ - const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - do {\ - uint8_t *s = src; \ - int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ - src0= *s++;\ - src1= *s++;\ - src2= *s++;\ - src3= *s++;\ - src4= *s++;\ - OP(dst[0], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ - src5= *s++;\ - OP(dst[1], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ - src6= *s++;\ - OP(dst[2], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ - src7= *s++;\ - OP(dst[3], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ - src8= *s++;\ - OP(dst[4], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ - OP(dst[5], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ - OP(dst[6], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ - OP(dst[7], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ - dst+=dstStride;\ - src+=srcStride;\ - }while(--h);\ -}\ -\ -static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int w=8;\ - do{\ - uint8_t *s = src, *d=dst;\ - int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ - src0 = *s; s+=srcStride; \ - src1 = *s; s+=srcStride; \ - src2 = *s; s+=srcStride; \ - src3 = *s; s+=srcStride; \ - src4 = *s; s+=srcStride; \ - OP(*d, (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));d+=dstStride;\ - src5 = *s; s+=srcStride; \ - OP(*d, (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));d+=dstStride;\ - src6 = *s; s+=srcStride; \ - OP(*d, (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));d+=dstStride;\ - src7 = *s; s+=srcStride; \ - OP(*d, (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));d+=dstStride;\ - src8 = *s; \ - OP(*d, (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));d+=dstStride;\ - OP(*d, (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));d+=dstStride;\ - OP(*d, (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));d+=dstStride;\ - OP(*d, (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ - dst++;\ - src++;\ - }while(--w);\ -}\ -\ -static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ - const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - do {\ - uint8_t *s = src;\ - int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ - int src9,src10,src11,src12,src13,src14,src15,src16;\ - src0= *s++;\ - src1= *s++;\ - src2= *s++;\ - src3= *s++;\ - src4= *s++;\ - OP(dst[ 0], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ - src5= *s++;\ - OP(dst[ 1], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ - src6= *s++;\ - OP(dst[ 2], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ - src7= *s++;\ - OP(dst[ 3], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ - src8= *s++;\ - OP(dst[ 4], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ - src9= *s++;\ - OP(dst[ 5], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ - src10= *s++;\ - OP(dst[ 6], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ - src11= *s++;\ - OP(dst[ 7], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ - src12= *s++;\ - OP(dst[ 8], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ - src13= *s++;\ - OP(dst[ 9], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ - src14= *s++;\ - OP(dst[10], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ - src15= *s++;\ - OP(dst[11], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ - src16= *s++;\ - OP(dst[12], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ - OP(dst[13], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ - OP(dst[14], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ - OP(dst[15], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ - dst+=dstStride;\ - src+=srcStride;\ - }while(--h);\ -}\ -\ -static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int w=16;\ - do {\ - uint8_t *s = src, *d=dst;\ - int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ - int src9,src10,src11,src12,src13,src14,src15,src16;\ - src0 = *s; s+=srcStride; \ - src1 = *s; s+=srcStride; \ - src2 = *s; s+=srcStride; \ - src3 = *s; s+=srcStride; \ - src4 = *s; s+=srcStride; \ - OP(*d, (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));d+=dstStride;\ - src5 = *s; s+=srcStride; \ - OP(*d, (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));d+=dstStride;\ - src6 = *s; s+=srcStride; \ - OP(*d, (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));d+=dstStride;\ - src7 = *s; s+=srcStride; \ - OP(*d, (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));d+=dstStride;\ - src8 = *s; s+=srcStride; \ - OP(*d, (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));d+=dstStride;\ - src9 = *s; s+=srcStride; \ - OP(*d, (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));d+=dstStride;\ - src10 = *s; s+=srcStride; \ - OP(*d, (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));d+=dstStride;\ - src11 = *s; s+=srcStride; \ - OP(*d, (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));d+=dstStride;\ - src12 = *s; s+=srcStride; \ - OP(*d, (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));d+=dstStride;\ - src13 = *s; s+=srcStride; \ - OP(*d, (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));d+=dstStride;\ - src14 = *s; s+=srcStride; \ - OP(*d, (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));d+=dstStride;\ - src15 = *s; s+=srcStride; \ - OP(*d, (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));d+=dstStride;\ - src16 = *s; \ - OP(*d, (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));d+=dstStride;\ - OP(*d, (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));d+=dstStride;\ - OP(*d, (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));d+=dstStride;\ - OP(*d, (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ - dst++;\ - src++;\ - }while(--w);\ -}\ -\ -static void OPNAME ## qpel8_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\ - OPNAME ## pixels8_c(dst, src, stride, 8);\ -}\ -\ -static void OPNAME ## qpel8_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t half[64];\ - put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ - OPNAME ## pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);\ -}\ -\ -static void OPNAME ## qpel8_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\ - OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ -}\ -\ -static void OPNAME ## qpel8_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t half[64];\ - put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ - OPNAME ## pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);\ -}\ -\ -static void OPNAME ## qpel8_mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[16*9];\ - uint8_t half[64];\ - copy_block9(full, src, 16, stride, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ - OPNAME ## pixels8_l2_aligned(dst, full, half, stride, 16, 8, 8);\ -}\ -\ -static void OPNAME ## qpel8_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[16*9];\ - copy_block9(full, src, 16, stride, 9);\ - OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ -}\ -\ -static void OPNAME ## qpel8_mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[16*9];\ - uint8_t half[64];\ - copy_block9(full, src, 16, stride, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ - OPNAME ## pixels8_l2_aligned(dst, full+16, half, stride, 16, 8, 8);\ -}\ -static void OPNAME ## qpel8_mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[16*9];\ - uint8_t halfH[72];\ - uint8_t halfHV[64];\ - copy_block9(full, src, 16, stride, 9);\ - put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\ -}\ -static void OPNAME ## qpel8_mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[16*9];\ - uint8_t halfH[72];\ - uint8_t halfHV[64];\ - copy_block9(full, src, 16, stride, 9);\ - put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\ -}\ -static void OPNAME ## qpel8_mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[16*9];\ - uint8_t halfH[72];\ - uint8_t halfHV[64];\ - copy_block9(full, src, 16, stride, 9);\ - put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\ -}\ -static void OPNAME ## qpel8_mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[16*9];\ - uint8_t halfH[72];\ - uint8_t halfHV[64];\ - copy_block9(full, src, 16, stride, 9);\ - put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\ -}\ -static void OPNAME ## qpel8_mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t halfH[72];\ - uint8_t halfHV[64];\ - put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\ -}\ -static void OPNAME ## qpel8_mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t halfH[72];\ - uint8_t halfHV[64];\ - put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ - put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\ -}\ -static void OPNAME ## qpel8_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[16*9];\ - uint8_t halfH[72];\ - copy_block9(full, src, 16, stride, 9);\ - put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\ - OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ -}\ -static void OPNAME ## qpel8_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[16*9];\ - uint8_t halfH[72];\ - copy_block9(full, src, 16, stride, 9);\ - put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\ - OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ -}\ -static void OPNAME ## qpel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t halfH[72];\ - put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ - OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ -}\ -static void OPNAME ## qpel16_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\ - OPNAME ## pixels16_c(dst, src, stride, 16);\ -}\ -\ -static void OPNAME ## qpel16_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t half[256];\ - put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ - OPNAME ## pixels16_l2_aligned2(dst, src, half, stride, stride, 16, 16);\ -}\ -\ -static void OPNAME ## qpel16_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\ - OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ -}\ -\ -static void OPNAME ## qpel16_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t half[256];\ - put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ - OPNAME ## pixels16_l2_aligned2(dst, src+1, half, stride, stride, 16, 16);\ -}\ -\ -static void OPNAME ## qpel16_mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[24*17];\ - uint8_t half[256];\ - copy_block17(full, src, 24, stride, 17);\ - put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ - OPNAME ## pixels16_l2_aligned(dst, full, half, stride, 24, 16, 16);\ -}\ -\ -static void OPNAME ## qpel16_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[24*17];\ - copy_block17(full, src, 24, stride, 17);\ - OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ -}\ -\ -static void OPNAME ## qpel16_mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[24*17];\ - uint8_t half[256];\ - copy_block17(full, src, 24, stride, 17);\ - put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ - OPNAME ## pixels16_l2_aligned(dst, full+24, half, stride, 24, 16, 16);\ -}\ -static void OPNAME ## qpel16_mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[24*17];\ - uint8_t halfH[272];\ - uint8_t halfHV[256];\ - copy_block17(full, src, 24, stride, 17);\ - put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ - put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\ - put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\ -}\ -static void OPNAME ## qpel16_mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[24*17];\ - uint8_t halfH[272];\ - uint8_t halfHV[256];\ - copy_block17(full, src, 24, stride, 17);\ - put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ - put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\ - put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\ -}\ -static void OPNAME ## qpel16_mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[24*17];\ - uint8_t halfH[272];\ - uint8_t halfHV[256];\ - copy_block17(full, src, 24, stride, 17);\ - put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ - put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\ - put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\ -}\ -static void OPNAME ## qpel16_mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[24*17];\ - uint8_t halfH[272];\ - uint8_t halfHV[256];\ - copy_block17(full, src, 24, stride, 17);\ - put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ - put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\ - put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\ -}\ -static void OPNAME ## qpel16_mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t halfH[272];\ - uint8_t halfHV[256];\ - put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ - put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\ -}\ -static void OPNAME ## qpel16_mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t halfH[272];\ - uint8_t halfHV[256];\ - put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ - put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\ -}\ -static void OPNAME ## qpel16_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[24*17];\ - uint8_t halfH[272];\ - copy_block17(full, src, 24, stride, 17);\ - put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ - put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\ - OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ -}\ -static void OPNAME ## qpel16_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[24*17];\ - uint8_t halfH[272];\ - copy_block17(full, src, 24, stride, 17);\ - put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ - put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\ - OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ -}\ -static void OPNAME ## qpel16_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t halfH[272];\ - put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ - OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ -} - -#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) -#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) -#define op_put(a, b) a = cm[((b) + 16)>>5] -#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] - -QPEL_MC(0, put_ , _ , op_put) -QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) -QPEL_MC(0, avg_ , _ , op_avg) -//QPEL_MC(1, avg_no_rnd , _ , op_avg) -#undef op_avg -#undef op_avg_no_rnd -#undef op_put -#undef op_put_no_rnd - -static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ - const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - - do{ - int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9; - uint8_t *s = src; - src_1 = s[-1]; - src0 = *s++; - src1 = *s++; - src2 = *s++; - dst[0]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; - src3 = *s++; - dst[1]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; - src4 = *s++; - dst[2]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; - src5 = *s++; - dst[3]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; - src6 = *s++; - dst[4]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; - src7 = *s++; - dst[5]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; - src8 = *s++; - dst[6]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; - src9 = *s++; - dst[7]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; - dst+=dstStride; - src+=srcStride; - }while(--h); -} - -static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ - const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - - do{ - int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9; - uint8_t *s = src,*d = dst; - src_1 = *(s-srcStride); - src0 = *s; s+=srcStride; - src1 = *s; s+=srcStride; - src2 = *s; s+=srcStride; - *d= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; d+=dstStride; - src3 = *s; s+=srcStride; - *d= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; d+=dstStride; - src4 = *s; s+=srcStride; - *d= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; d+=dstStride; - src5 = *s; s+=srcStride; - *d= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; d+=dstStride; - src6 = *s; s+=srcStride; - *d= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; d+=dstStride; - src7 = *s; s+=srcStride; - *d= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; d+=dstStride; - src8 = *s; s+=srcStride; - *d= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; d+=dstStride; - src9 = *s; - *d= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; d+=dstStride; - src++; - dst++; - }while(--w); -} - -static void put_mspel8_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){ - put_pixels8_c(dst, src, stride, 8); -} - -static void put_mspel8_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){ - uint8_t half[64]; - wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); - put_pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8); -} - -static void put_mspel8_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){ - wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); -} - -static void put_mspel8_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){ - uint8_t half[64]; - wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); - put_pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8); -} - -static void put_mspel8_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){ - wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); -} - -static void put_mspel8_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){ - uint8_t halfH[88]; - uint8_t halfV[64]; - uint8_t halfHV[64]; - wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); - wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); - wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); - put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8); -} -static void put_mspel8_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){ - uint8_t halfH[88]; - uint8_t halfV[64]; - uint8_t halfHV[64]; - wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); - wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); - wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); - put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8); -} -static void put_mspel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){ - uint8_t halfH[88]; - wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); - wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); -} |