diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2012-03-01 21:35:22 -0800 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2012-03-03 20:39:59 -0800 |
commit | e25be4715463da3abdb99acf735bb2148c3bd5c8 (patch) | |
tree | c1676b54bf29f5ab894d81435ef954e45bb7ee7a /libavcodec/x86/vp8dsp-init.c | |
parent | 8249a23fc1f33fa8b3c67b5cdb0050cc6d0013d6 (diff) | |
download | ffmpeg-e25be4715463da3abdb99acf735bb2148c3bd5c8.tar.gz |
vp8: convert idct/mc x86 assembly to use cpuflags().
Diffstat (limited to 'libavcodec/x86/vp8dsp-init.c')
-rw-r--r-- | libavcodec/x86/vp8dsp-init.c | 112 |
1 files changed, 56 insertions, 56 deletions
diff --git a/libavcodec/x86/vp8dsp-init.c b/libavcodec/x86/vp8dsp-init.c index 3e05bb2fb9..d3f1456b71 100644 --- a/libavcodec/x86/vp8dsp-init.c +++ b/libavcodec/x86/vp8dsp-init.c @@ -29,16 +29,16 @@ /* * MC functions */ -extern void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, ptrdiff_t dststride, +extern void ff_put_vp8_epel4_h4_mmx2 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); -extern void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, ptrdiff_t dststride, +extern void ff_put_vp8_epel4_h6_mmx2 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); -extern void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, ptrdiff_t dststride, +extern void ff_put_vp8_epel4_v4_mmx2 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); -extern void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, ptrdiff_t dststride, +extern void ff_put_vp8_epel4_v6_mmx2 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); @@ -80,7 +80,7 @@ extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); -extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, ptrdiff_t dststride, +extern void ff_put_vp8_bilinear4_h_mmx2 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); extern void ff_put_vp8_bilinear8_h_sse2 (uint8_t *dst, ptrdiff_t dststride, @@ -93,7 +93,7 @@ extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); -extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, ptrdiff_t dststride, +extern void ff_put_vp8_bilinear4_v_mmx2 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); extern void ff_put_vp8_bilinear8_v_sse2 (uint8_t *dst, ptrdiff_t dststride, @@ -139,27 +139,27 @@ static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ } #if ARCH_X86_32 -TAP_W8 (mmxext, epel, h4) -TAP_W8 (mmxext, epel, h6) -TAP_W16(mmxext, epel, h6) -TAP_W8 (mmxext, epel, v4) -TAP_W8 (mmxext, epel, v6) -TAP_W16(mmxext, epel, v6) -TAP_W8 (mmxext, bilinear, h) -TAP_W16(mmxext, bilinear, h) -TAP_W8 (mmxext, bilinear, v) -TAP_W16(mmxext, bilinear, v) +TAP_W8 (mmx2, epel, h4) +TAP_W8 (mmx2, epel, h6) +TAP_W16(mmx2, epel, h6) +TAP_W8 (mmx2, epel, v4) +TAP_W8 (mmx2, epel, v6) +TAP_W16(mmx2, epel, v6) +TAP_W8 (mmx2, bilinear, h) +TAP_W16(mmx2, bilinear, h) +TAP_W8 (mmx2, bilinear, v) +TAP_W16(mmx2, bilinear, v) #endif -TAP_W16(sse2, epel, h6) -TAP_W16(sse2, epel, v6) -TAP_W16(sse2, bilinear, h) -TAP_W16(sse2, bilinear, v) +TAP_W16(sse2, epel, h6) +TAP_W16(sse2, epel, v6) +TAP_W16(sse2, bilinear, h) +TAP_W16(sse2, bilinear, v) -TAP_W16(ssse3, epel, h6) -TAP_W16(ssse3, epel, v6) -TAP_W16(ssse3, bilinear, h) -TAP_W16(ssse3, bilinear, v) +TAP_W16(ssse3, epel, h6) +TAP_W16(ssse3, epel, v6) +TAP_W16(ssse3, bilinear, h) +TAP_W16(ssse3, bilinear, v) #define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \ static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \ @@ -177,13 +177,13 @@ static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT #if ARCH_X86_32 #define HVTAPMMX(x, y) \ -HVTAP(mmxext, 8, x, y, 4, 8) \ -HVTAP(mmxext, 8, x, y, 8, 16) +HVTAP(mmx2, 8, x, y, 4, 8) \ +HVTAP(mmx2, 8, x, y, 8, 16) -HVTAP(mmxext, 8, 6, 6, 16, 16) +HVTAP(mmx2, 8, 6, 6, 16, 16) #else #define HVTAPMMX(x, y) \ -HVTAP(mmxext, 8, x, y, 4, 8) +HVTAP(mmx2, 8, x, y, 4, 8) #endif HVTAPMMX(4, 4) @@ -218,16 +218,16 @@ static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \ dst, dststride, tmp, SIZE, height, mx, my); \ } -HVBILIN(mmxext, 8, 4, 8) +HVBILIN(mmx2, 8, 4, 8) #if ARCH_X86_32 -HVBILIN(mmxext, 8, 8, 16) -HVBILIN(mmxext, 8, 16, 16) +HVBILIN(mmx2, 8, 8, 16) +HVBILIN(mmx2, 8, 16, 16) #endif -HVBILIN(sse2, 8, 8, 16) -HVBILIN(sse2, 8, 16, 16) -HVBILIN(ssse3, 8, 4, 8) -HVBILIN(ssse3, 8, 8, 16) -HVBILIN(ssse3, 8, 16, 16) +HVBILIN(sse2, 8, 8, 16) +HVBILIN(sse2, 8, 16, 16) +HVBILIN(ssse3, 8, 4, 8) +HVBILIN(ssse3, 8, 8, 16) +HVBILIN(ssse3, 8, 16, 16) extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride); @@ -283,7 +283,7 @@ extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \ int e, int i, int hvt); DECLARE_LOOP_FILTER(mmx) -DECLARE_LOOP_FILTER(mmxext) +DECLARE_LOOP_FILTER(mmx2) DECLARE_LOOP_FILTER(sse2) DECLARE_LOOP_FILTER(ssse3) DECLARE_LOOP_FILTER(sse4) @@ -351,26 +351,26 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) /* note that 4-tap width=16 functions are missing because w=16 * is only used for luma, and luma is always a copy or sixtap. */ if (mm_flags & AV_CPU_FLAG_MMX2) { - VP8_MC_FUNC(2, 4, mmxext); - VP8_BILINEAR_MC_FUNC(2, 4, mmxext); + VP8_MC_FUNC(2, 4, mmx2); + VP8_BILINEAR_MC_FUNC(2, 4, mmx2); #if ARCH_X86_32 - VP8_LUMA_MC_FUNC(0, 16, mmxext); - VP8_MC_FUNC(1, 8, mmxext); - VP8_BILINEAR_MC_FUNC(0, 16, mmxext); - VP8_BILINEAR_MC_FUNC(1, 8, mmxext); - - c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; - c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; - - c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; - c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; - c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext; - c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext; - - c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext; - c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext; - c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext; - c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext; + VP8_LUMA_MC_FUNC(0, 16, mmx2); + VP8_MC_FUNC(1, 8, mmx2); + VP8_BILINEAR_MC_FUNC(0, 16, mmx2); + VP8_BILINEAR_MC_FUNC(1, 8, mmx2); + + c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx2; + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx2; + + c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx2; + c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx2; + c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx2; + c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx2; + + c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx2; + c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx2; + c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx2; + c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx2; #endif } |