aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/diracdsp_mmx.c
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2014-05-23 00:33:27 -0300
committerMichael Niedermayer <michaelni@gmx.at>2014-05-23 15:02:42 +0200
commit6a4832caaede15e3d918b1408ff83fe30324507b (patch)
tree11cca82a04abc99ed81421f8302a8cf07e78c668 /libavcodec/x86/diracdsp_mmx.c
parent64bf8455442b229f30fa293c927957fea116335f (diff)
downloadffmpeg-6a4832caaede15e3d918b1408ff83fe30324507b.tar.gz
x86/diracdsp: mark all functions as yasm
No inline asm dirac code remains in the tree, so replace every relevant check. This also moves all the dirac functions from dsputil_mmx.c to diracdsp_mmx.c Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/diracdsp_mmx.c')
-rw-r--r--libavcodec/x86/diracdsp_mmx.c88
1 files changed, 72 insertions, 16 deletions
diff --git a/libavcodec/x86/diracdsp_mmx.c b/libavcodec/x86/diracdsp_mmx.c
index a28bb82060..e10feafe40 100644
--- a/libavcodec/x86/diracdsp_mmx.c
+++ b/libavcodec/x86/diracdsp_mmx.c
@@ -18,8 +18,10 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "libavutil/x86/cpu.h"
#include "dsputil_x86.h"
#include "diracdsp_mmx.h"
+#include "fpel.h"
void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
@@ -56,14 +58,76 @@ HPEL_FILTER(16, sse2)
c->PFX ## _dirac_pixels_tab[1][IDX] = ff_ ## PFX ## _dirac_pixels16_ ## EXT; \
c->PFX ## _dirac_pixels_tab[2][IDX] = ff_ ## PFX ## _dirac_pixels32_ ## EXT
+#define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\
+void ff_ ## OPNAME2 ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+ if (h&3)\
+ ff_ ## OPNAME2 ## _dirac_pixels8_c(dst, src, stride, h);\
+ else\
+ OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME2 ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+ if (h&3)\
+ ff_ ## OPNAME2 ## _dirac_pixels16_c(dst, src, stride, h);\
+ else\
+ OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+ if (h&3) {\
+ ff_ ## OPNAME2 ## _dirac_pixels32_c(dst, src, stride, h);\
+ } else {\
+ OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\
+ OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\
+ }\
+}
+
+void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+
+DIRAC_PIXOP(put, ff_put, mmx)
+DIRAC_PIXOP(avg, ff_avg, mmx)
+DIRAC_PIXOP(avg, ff_avg, mmxext)
+
+void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ if (h&3)
+ ff_put_dirac_pixels16_c(dst, src, stride, h);
+ else
+ ff_put_pixels16_sse2(dst, src[0], stride, h);
+}
+void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ if (h&3)
+ ff_avg_dirac_pixels16_c(dst, src, stride, h);
+ else
+ ff_avg_pixels16_sse2(dst, src[0], stride, h);
+}
+void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ if (h&3) {
+ ff_put_dirac_pixels32_c(dst, src, stride, h);
+ } else {
+ ff_put_pixels16_sse2(dst , src[0] , stride, h);
+ ff_put_pixels16_sse2(dst+16, src[0]+16, stride, h);
+ }
+}
+void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ if (h&3) {
+ ff_avg_dirac_pixels32_c(dst, src, stride, h);
+ } else {
+ ff_avg_pixels16_sse2(dst , src[0] , stride, h);
+ ff_avg_pixels16_sse2(dst+16, src[0]+16, stride, h);
+ }
+}
+
void ff_diracdsp_init_mmx(DiracDSPContext* c)
{
int mm_flags = av_get_cpu_flags();
- if (!(mm_flags & AV_CPU_FLAG_MMX))
- return;
-
-#if HAVE_YASM
+ if (EXTERNAL_MMX(mm_flags)) {
c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
#if !ARCH_X86_64
c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx;
@@ -72,33 +136,25 @@ void ff_diracdsp_init_mmx(DiracDSPContext* c)
c->add_rect_clamped = ff_add_rect_clamped_mmx;
c->put_signed_rect_clamped = ff_put_signed_rect_clamped_mmx;
#endif
-#endif
-
-#if HAVE_MMX_INLINE
PIXFUNC(put, 0, mmx);
PIXFUNC(avg, 0, mmx);
-#endif
+ }
-#if HAVE_MMXEXT_INLINE
- if (mm_flags & AV_CPU_FLAG_MMX2) {
+ if (EXTERNAL_MMXEXT(mm_flags)) {
PIXFUNC(avg, 0, mmxext);
}
-#endif
- if (mm_flags & AV_CPU_FLAG_SSE2) {
-#if HAVE_YASM
+ if (EXTERNAL_SSE2(mm_flags)) {
c->dirac_hpel_filter = dirac_hpel_filter_sse2;
c->add_rect_clamped = ff_add_rect_clamped_sse2;
c->put_signed_rect_clamped = ff_put_signed_rect_clamped_sse2;
c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;
-#endif
-#if HAVE_SSE2_INLINE
+
c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2;
c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2;
c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
-#endif
}
}