aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/i386/dsputil_mmx.c
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2002-09-11 12:39:53 +0000
committerMichael Niedermayer <michaelni@gmx.at>2002-09-11 12:39:53 +0000
commitb3184779924e40e82b1f92b4b315b2c4074a9669 (patch)
tree6f76a3ff7ce70d6d424f60206de7496f3845873f /libavcodec/i386/dsputil_mmx.c
parent6b460aa387530feefc91302c150a3405997e61cf (diff)
downloadffmpeg-b3184779924e40e82b1f92b4b315b2c4074a9669.tar.gz
put/avg_pixels16
fixing 2 small qpel bugs Originally committed as revision 915 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386/dsputil_mmx.c')
-rw-r--r--libavcodec/i386/dsputil_mmx.c167
1 files changed, 121 insertions, 46 deletions
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index fed1818743..4336e4bde0 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -343,7 +343,7 @@ static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line
} while (--i);
}
-static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+static void put_pixels8_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
__asm __volatile(
"lea (%3, %3), %%eax \n\t"
@@ -369,6 +369,40 @@ static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int
);
}
+static void put_pixels16_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ __asm __volatile(
+ "lea (%3, %3), %%eax \n\t"
+ ".balign 8 \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm4 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq 8(%1, %3), %%mm5 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm4, 8(%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "movq %%mm5, 8(%2, %3) \n\t"
+ "addl %%eax, %1 \n\t"
+ "addl %%eax, %2 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm4 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq 8(%1, %3), %%mm5 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm4, 8(%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "movq %%mm5, 8(%2, %3) \n\t"
+ "addl %%eax, %1 \n\t"
+ "addl %%eax, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ : "+g"(h), "+r" (pixels), "+r" (block)
+ : "r"(line_size)
+ : "%eax", "memory"
+ );
+}
+
static void clear_blocks_mmx(DCTELEM *blocks)
{
__asm __volatile(
@@ -424,25 +458,45 @@ void dsputil_init_mmx(void)
pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
- put_pixels_tab[0] = put_pixels_mmx;
- put_pixels_tab[1] = put_pixels_x2_mmx;
- put_pixels_tab[2] = put_pixels_y2_mmx;
- put_pixels_tab[3] = put_pixels_xy2_mmx;
-
- put_no_rnd_pixels_tab[0] = put_pixels_mmx;
- put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx;
- put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx;
- put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_mmx;
-
- avg_pixels_tab[0] = avg_pixels_mmx;
- avg_pixels_tab[1] = avg_pixels_x2_mmx;
- avg_pixels_tab[2] = avg_pixels_y2_mmx;
- avg_pixels_tab[3] = avg_pixels_xy2_mmx;
-
- avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_mmx;
- avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_mmx;
- avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_mmx;
- avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_mmx;
+ put_pixels_tab[0][0] = put_pixels16_mmx;
+ put_pixels_tab[0][1] = put_pixels16_x2_mmx;
+ put_pixels_tab[0][2] = put_pixels16_y2_mmx;
+ put_pixels_tab[0][3] = put_pixels16_xy2_mmx;
+
+ put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx;
+ put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
+ put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
+ put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx;
+
+ avg_pixels_tab[0][0] = avg_pixels16_mmx;
+ avg_pixels_tab[0][1] = avg_pixels16_x2_mmx;
+ avg_pixels_tab[0][2] = avg_pixels16_y2_mmx;
+ avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
+
+ avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx;
+ avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx;
+ avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx;
+ avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx;
+
+ put_pixels_tab[1][0] = put_pixels8_mmx;
+ put_pixels_tab[1][1] = put_pixels8_x2_mmx;
+ put_pixels_tab[1][2] = put_pixels8_y2_mmx;
+ put_pixels_tab[1][3] = put_pixels8_xy2_mmx;
+
+ put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx;
+ put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
+ put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
+ put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx;
+
+ avg_pixels_tab[1][0] = avg_pixels8_mmx;
+ avg_pixels_tab[1][1] = avg_pixels8_x2_mmx;
+ avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
+ avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
+
+ avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx;
+ avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
+ avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
+ avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
if (mm_flags & MM_MMXEXT) {
pix_abs16x16 = pix_abs16x16_mmx2;
@@ -455,25 +509,45 @@ void dsputil_init_mmx(void)
pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2;
- put_pixels_tab[1] = put_pixels_x2_mmx2;
- put_pixels_tab[2] = put_pixels_y2_mmx2;
- put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx2;
- put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx2;
-
- avg_pixels_tab[0] = avg_pixels_mmx2;
- avg_pixels_tab[1] = avg_pixels_x2_mmx2;
- avg_pixels_tab[2] = avg_pixels_y2_mmx2;
- avg_pixels_tab[3] = avg_pixels_xy2_mmx2;
+ put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
+ put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
+ put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
+ put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
+
+ avg_pixels_tab[0][0] = avg_pixels16_mmx2;
+ avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
+ avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
+ avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
+
+ put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
+ put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
+ put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
+ put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
+
+ avg_pixels_tab[1][0] = avg_pixels8_mmx2;
+ avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
+ avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
+ avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
} else if (mm_flags & MM_3DNOW) {
- put_pixels_tab[1] = put_pixels_x2_3dnow;
- put_pixels_tab[2] = put_pixels_y2_3dnow;
- put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_3dnow;
- put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_3dnow;
-
- avg_pixels_tab[0] = avg_pixels_3dnow;
- avg_pixels_tab[1] = avg_pixels_x2_3dnow;
- avg_pixels_tab[2] = avg_pixels_y2_3dnow;
- avg_pixels_tab[3] = avg_pixels_xy2_3dnow;
+ put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
+ put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
+ put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
+ put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
+
+ avg_pixels_tab[0][0] = avg_pixels16_3dnow;
+ avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
+ avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
+ avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
+
+ put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
+ put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
+ put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
+ put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
+
+ avg_pixels_tab[1][0] = avg_pixels8_3dnow;
+ avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
+ avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
+ avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
}
/* idct */
@@ -552,21 +626,22 @@ void bit_exact_idct_put(UINT8 *dest, int line_size, INT16 *block){
void dsputil_set_bit_exact_mmx(void)
{
if (mm_flags & MM_MMX) {
- if (mm_flags & MM_MMXEXT) {
- put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx;
- put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx;
- avg_pixels_tab[3] = avg_pixels_xy2_mmx;
+
+ /* MMX2 & 3DNOW */
+ put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
+ put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
+ avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
+ put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
+ put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
+ avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
+ if (mm_flags & MM_MMXEXT) {
pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
- } else if (mm_flags & MM_3DNOW) {
- put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx;
- put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx;
- avg_pixels_tab[3] = avg_pixels_xy2_mmx;
}
#ifdef SIMPLE_IDCT
if(ff_idct_put==gen_idct_put && ff_idct == simple_idct_mmx)