aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Conrad <lessen42@gmail.com>2009-01-05 22:13:42 +0000
committerGuillaume Poirier <gpoirier@mplayerhq.hu>2009-01-05 22:13:42 +0000
commitc663cb0d4fa30be5e942c196714d1d659cfab7aa (patch)
treec89a18c9eb0c5ebc992fd6452490f6d16e84638e
parent52e57500bb4295404a5cf63d4050b8e1ab530479 (diff)
downloadffmpeg-c663cb0d4fa30be5e942c196714d1d659cfab7aa.tar.gz
AltiVec version of h264_idct(8)_dc_add
Patch by David Conrad %lessen42 A gmail P com% Originally committed as revision 16442 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/ppc/h264_altivec.c64
1 files changed, 54 insertions, 10 deletions
diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c
index 50a35ca6b0..67b20f062e 100644
--- a/libavcodec/ppc/h264_altivec.c
+++ b/libavcodec/ppc/h264_altivec.c
@@ -596,19 +596,61 @@ void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) {
ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);
}
-// TODO: implement this in AltiVec
-static void ff_h264_idct8_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride) {
- int i, j;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
- int dc = (block[0] + 32) >> 6;
- for( j = 0; j < 8; j++ )
- {
- for( i = 0; i < 8; i++ )
- dst[i] = cm[ dst[i] + dc ];
- dst += stride;
+static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *block, int stride, int size)
+{
+ vec_s16 dc16;
+ vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;
+ LOAD_ZERO;
+ DECLARE_ALIGNED_16(int, dc);
+ int i;
+
+ dc = (block[0] + 32) >> 6;
+ dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1);
+
+ if (size == 4)
+ dc16 = vec_sld(dc16, zero_s16v, 8);
+ dcplus = vec_packsu(dc16, zero_s16v);
+ dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);
+
+ aligner = vec_lvsr(0, dst);
+ dcplus = vec_perm(dcplus, dcplus, aligner);
+ dcminus = vec_perm(dcminus, dcminus, aligner);
+
+ for (i = 0; i < size; i += 4) {
+ v0 = vec_ld(0, dst+0*stride);
+ v1 = vec_ld(0, dst+1*stride);
+ v2 = vec_ld(0, dst+2*stride);
+ v3 = vec_ld(0, dst+3*stride);
+
+ v0 = vec_adds(v0, dcplus);
+ v1 = vec_adds(v1, dcplus);
+ v2 = vec_adds(v2, dcplus);
+ v3 = vec_adds(v3, dcplus);
+
+ v0 = vec_subs(v0, dcminus);
+ v1 = vec_subs(v1, dcminus);
+ v2 = vec_subs(v2, dcminus);
+ v3 = vec_subs(v3, dcminus);
+
+ vec_st(v0, 0, dst+0*stride);
+ vec_st(v1, 0, dst+1*stride);
+ vec_st(v2, 0, dst+2*stride);
+ vec_st(v3, 0, dst+3*stride);
+
+ dst += 4*stride;
}
}
+static void h264_idct_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
+{
+ h264_idct_dc_add_internal(dst, block, stride, 4);
+}
+
+static void ff_h264_idct8_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
+{
+ h264_idct_dc_add_internal(dst, block, stride, 8);
+}
+
static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
int i;
for(i=0; i<16; i+=4){
@@ -903,6 +945,8 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
h264_idct_add16, h264_idct_add16intra, h264_idct_add8 are implemented
c->h264_idct_add = ff_h264_idct_add_altivec;
*/
+ c->h264_idct_dc_add= h264_idct_dc_add_altivec;
+ c->h264_idct8_dc_add = ff_h264_idct8_dc_add_altivec;
c->h264_idct8_add = ff_h264_idct8_add_altivec;
c->h264_idct8_add4 = ff_h264_idct8_add4_altivec;
c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec;