Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)

Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk
author: Michael Niedermayer <michaelni@gmx.at> 2002-11-02 11:28:08 +0000
committer: Michael Niedermayer <michaelni@gmx.at> 2002-11-02 11:28:08 +0000
commit: 05c4072b45f3cde1185de6eccfe7febf91d9f8fd (patch)
tree: 164979e7e556e44f5678d5599a40810bbf89fed8 /libavcodec/ppc/dsputil_altivec.c
parent: 26b35efb3a0d02a1ef6a8af804e6c59c1a190fa3 (diff)
download: ffmpeg-05c4072b45f3cde1185de6eccfe7febf91d9f8fd.tar.gz
1 files changed, 120 insertions, 6 deletions
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c
index 18d9d27a48..8a50ccb900 100644
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -1,15 +1,29 @@
+/*
+ * Copyright (c) 2002 Brian Foley
+ * Copyright (c) 2002 Dieter Shirley
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+ 
 #include "../dsputil.h"
+#include "dsputil_altivec.h"
 
 #if CONFIG_DARWIN
 #include <sys/sysctl.h>
 #endif
 
-int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
-int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
-int pix_sum_altivec(UINT8 * pix, int line_size);
-
-int has_altivec(void);
-
 int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
 {
     int i, s;
@@ -127,6 +141,105 @@ int pix_sum_altivec(UINT8 * pix, int line_size)
     return s;
 }
 
+void get_pixels_altivec(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
+{
+    int i;
+    vector unsigned char perm, bytes, *pixv;
+    vector unsigned char zero = (vector unsigned char) (0);
+    vector signed short shorts;
+
+    for(i=0;i<8;i++)
+    {
+        // Read potentially unaligned pixels.
+        // We're reading 16 pixels, and actually only want 8,
+        // but we simply ignore the extras.
+        perm = vec_lvsl(0, pixels);
+        pixv = (vector unsigned char *) pixels;
+        bytes = vec_perm(pixv[0], pixv[1], perm);
+
+        // convert the bytes into shorts
+        shorts = (vector signed short)vec_mergeh(zero, bytes);
+
+        // save the data to the block, we assume the block is 16-byte aligned
+        vec_st(shorts, i*16, (vector signed short*)block);
+
+        pixels += line_size;
+    }
+}
+
+void diff_pixels_altivec(DCTELEM *restrict block, const UINT8 *s1,
+        const UINT8 *s2, int stride)
+{
+    int i;
+    vector unsigned char perm, bytes, *pixv;
+    vector unsigned char zero = (vector unsigned char) (0);
+    vector signed short shorts1, shorts2;
+
+    for(i=0;i<4;i++)
+    {
+        // Read potentially unaligned pixels
+        // We're reading 16 pixels, and actually only want 8,
+        // but we simply ignore the extras.
+        perm = vec_lvsl(0, s1);
+        pixv = (vector unsigned char *) s1;
+        bytes = vec_perm(pixv[0], pixv[1], perm);
+
+        // convert the bytes into shorts
+        shorts1 = (vector signed short)vec_mergeh(zero, bytes);
+
+        // Do the same for the second block of pixels
+        perm = vec_lvsl(0, s2);
+        pixv = (vector unsigned char *) s2;
+        bytes = vec_perm(pixv[0], pixv[1], perm);
+
+        // convert the bytes into shorts
+        shorts2 = (vector signed short)vec_mergeh(zero, bytes);
+
+        // Do the subtraction
+        shorts1 = vec_sub(shorts1, shorts2);
+
+        // save the data to the block, we assume the block is 16-byte aligned
+        vec_st(shorts1, 0, (vector signed short*)block);
+
+        s1 += stride;
+        s2 += stride;
+        block += 8;
+
+
+        // The code below is a copy of the code above... This is a manual
+        // unroll.
+
+        // Read potentially unaligned pixels
+        // We're reading 16 pixels, and actually only want 8,
+        // but we simply ignore the extras.
+        perm = vec_lvsl(0, s1);
+        pixv = (vector unsigned char *) s1;
+        bytes = vec_perm(pixv[0], pixv[1], perm);
+
+        // convert the bytes into shorts
+        shorts1 = (vector signed short)vec_mergeh(zero, bytes);
+
+        // Do the same for the second block of pixels
+        perm = vec_lvsl(0, s2);
+        pixv = (vector unsigned char *) s2;
+        bytes = vec_perm(pixv[0], pixv[1], perm);
+
+        // convert the bytes into shorts
+        shorts2 = (vector signed short)vec_mergeh(zero, bytes);
+
+        // Do the subtraction
+        shorts1 = vec_sub(shorts1, shorts2);
+
+        // save the data to the block, we assume the block is 16-byte aligned
+        vec_st(shorts1, 0, (vector signed short*)block);
+
+        s1 += stride;
+        s2 += stride;
+        block += 8;
+    }
+}
+
+
 int has_altivec(void)
 {
 #if CONFIG_DARWIN
@@ -141,3 +254,4 @@ int has_altivec(void)
 #endif
     return 0;
 }
+
author	Michael Niedermayer <michaelni@gmx.at>	2002-11-02 11:28:08 +0000
committer	Michael Niedermayer <michaelni@gmx.at>	2002-11-02 11:28:08 +0000
commit	05c4072b45f3cde1185de6eccfe7febf91d9f8fd (patch)
tree	164979e7e556e44f5678d5599a40810bbf89fed8 /libavcodec/ppc/dsputil_altivec.c
parent	26b35efb3a0d02a1ef6a8af804e6c59c1a190fa3 (diff)
download	ffmpeg-05c4072b45f3cde1185de6eccfe7febf91d9f8fd.tar.gz