aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2008-02-03 17:04:33 +0000
committerLoren Merritt <lorenm@u.washington.edu>2008-02-03 17:04:33 +0000
commitb313e8159c90db8cf60f9771e85186cf46074e8f (patch)
tree44cdf47f5386c1dfd82a69c7e25f5e2250f91793 /libavcodec
parent6c01d0069d6ee516c31e92ccffbc5ef99a86b90c (diff)
downloadffmpeg-b313e8159c90db8cf60f9771e85186cf46074e8f.tar.gz
avg_pixels4_mmx2
Originally committed as revision 11829 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/i386/dsputil_mmx.c2
-rw-r--r--libavcodec/i386/dsputil_mmx_avg.h25
2 files changed, 25 insertions, 2 deletions
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 7c05d870b5..5b4622f054 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -206,13 +206,11 @@ DECLARE_ALIGNED_16(const double, ff_pd_2[2]) = { 2.0, 2.0 };
#define put_pixels16_mmx2 put_pixels16_mmx
#define put_pixels8_mmx2 put_pixels8_mmx
#define put_pixels4_mmx2 put_pixels4_mmx
-#define avg_pixels4_mmx2 avg_pixels4_mmx
#define put_no_rnd_pixels16_mmx2 put_no_rnd_pixels16_mmx
#define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx
#define put_pixels16_3dnow put_pixels16_mmx
#define put_pixels8_3dnow put_pixels8_mmx
#define put_pixels4_3dnow put_pixels4_mmx
-#define avg_pixels4_3dnow avg_pixels4_mmx
#define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx
#define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx
diff --git a/libavcodec/i386/dsputil_mmx_avg.h b/libavcodec/i386/dsputil_mmx_avg.h
index cb70b9ac7a..970673aade 100644
--- a/libavcodec/i386/dsputil_mmx_avg.h
+++ b/libavcodec/i386/dsputil_mmx_avg.h
@@ -795,6 +795,31 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line
:"%"REG_a, "memory");
}
+static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ do {
+ asm volatile(
+ "movd (%1), %%mm0 \n\t"
+ "movd (%1, %2), %%mm1 \n\t"
+ "movd (%1, %2, 2), %%mm2 \n\t"
+ "movd (%1, %3), %%mm3 \n\t"
+ PAVGB" (%0), %%mm0 \n\t"
+ PAVGB" (%0, %2), %%mm1 \n\t"
+ PAVGB" (%0, %2, 2), %%mm2 \n\t"
+ PAVGB" (%0, %3), %%mm3 \n\t"
+ "movd %%mm0, (%1) \n\t"
+ "movd %%mm1, (%1, %2) \n\t"
+ "movd %%mm2, (%1, %2, 2) \n\t"
+ "movd %%mm3, (%1, %3) \n\t"
+ ::"S"(pixels), "D"(block),
+ "r" ((long)line_size), "r"(3L*line_size)
+ :"memory");
+ block += 4*line_size;
+ pixels += 4*line_size;
+ h -= 4;
+ } while(h > 0);
+}
+
//FIXME the following could be optimized too ...
static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h);