diff options
author | Timothy Gu <timothygu99@gmail.com> | 2016-02-01 20:50:22 -0800 |
---|---|---|
committer | Timothy Gu <timothygu99@gmail.com> | 2016-02-05 19:29:43 -0800 |
commit | 17ab8f7e6852a9db46aec3267bc8a7e40dde849f (patch) | |
tree | b079118165931a0bbc7ea8a62db051bc7f102b8a /libavcodec/x86/diracdsp_init.c | |
parent | 1c40bccc0949f24bde1cd51194367a4a9fd490c5 (diff) | |
download | ffmpeg-17ab8f7e6852a9db46aec3267bc8a7e40dde849f.tar.gz |
diracdsp: Make x86 files/functions names consistent
Diffstat (limited to 'libavcodec/x86/diracdsp_init.c')
-rw-r--r-- | libavcodec/x86/diracdsp_init.c | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/libavcodec/x86/diracdsp_init.c b/libavcodec/x86/diracdsp_init.c new file mode 100644 index 0000000000..5fae79891b --- /dev/null +++ b/libavcodec/x86/diracdsp_init.c @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2010 David Conrad + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/x86/cpu.h" +#include "libavcodec/diracdsp.h" +#include "fpel.h" + +DECL_DIRAC_PIXOP(put, mmx); +DECL_DIRAC_PIXOP(avg, mmx); +DECL_DIRAC_PIXOP(avg, mmxext); + +void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h); +void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h); +void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h); +void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h); + +void ff_add_rect_clamped_mmx(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int); +void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int); + +void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); +void ff_add_dirac_obmc16_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); +void ff_add_dirac_obmc32_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); + +void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); +void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); + +void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); +void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); +void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); +void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); + +#if HAVE_YASM + +#define HPEL_FILTER(MMSIZE, EXT) \ + void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int); \ + void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int); \ + \ + static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \ + const uint8_t *src, int stride, int width, int height) \ + { \ + while( height-- ) \ + { \ + ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \ + ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \ + ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \ + \ + dsth += stride; \ + dstv += stride; \ + dstc += stride; \ + src += stride; \ + } \ + } + +#define PIXFUNC(PFX, IDX, EXT) \ + /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = ff_ ## PFX ## _dirac_pixels8_ ## EXT;*/ \ + c->PFX ## _dirac_pixels_tab[1][IDX] = ff_ ## PFX ## _dirac_pixels16_ ## EXT; \ + c->PFX ## _dirac_pixels_tab[2][IDX] = ff_ ## PFX ## _dirac_pixels32_ ## EXT + +#define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\ +void ff_ ## OPNAME2 ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ +{\ + if (h&3)\ + ff_ ## OPNAME2 ## _dirac_pixels8_c(dst, src, stride, h);\ + else\ + OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\ +}\ +void ff_ ## OPNAME2 ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ +{\ + if (h&3)\ + ff_ ## OPNAME2 ## _dirac_pixels16_c(dst, src, stride, h);\ + else\ + OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ +}\ +void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ +{\ + if (h&3) {\ + ff_ ## OPNAME2 ## _dirac_pixels32_c(dst, src, stride, h);\ + } else {\ + OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ + OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ + }\ +} + +DIRAC_PIXOP(put, ff_put, mmx) +DIRAC_PIXOP(avg, ff_avg, mmx) +DIRAC_PIXOP(avg, ff_avg, mmxext) + +void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) +{ + if (h&3) + ff_put_dirac_pixels16_c(dst, src, stride, h); + else + ff_put_pixels16_sse2(dst, src[0], stride, h); +} +void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) +{ + if (h&3) + ff_avg_dirac_pixels16_c(dst, src, stride, h); + else + ff_avg_pixels16_sse2(dst, src[0], stride, h); +} +void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) +{ + if (h&3) { + ff_put_dirac_pixels32_c(dst, src, stride, h); + } else { + ff_put_pixels16_sse2(dst , src[0] , stride, h); + ff_put_pixels16_sse2(dst+16, src[0]+16, stride, h); + } +} +void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) +{ + if (h&3) { + ff_avg_dirac_pixels32_c(dst, src, stride, h); + } else { + ff_avg_pixels16_sse2(dst , src[0] , stride, h); + ff_avg_pixels16_sse2(dst+16, src[0]+16, stride, h); + } +} + +#else // HAVE_YASM + +#define HPEL_FILTER(MMSIZE, EXT) \ + void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \ + const uint8_t *src, int stride, int width, int height); + +#define PIXFUNC(PFX, IDX, EXT) do {} while (0) + +#endif // HAVE_YASM + +#if !ARCH_X86_64 +HPEL_FILTER(8, mmx) +#endif +HPEL_FILTER(16, sse2) + +void ff_diracdsp_init_x86(DiracDSPContext* c) +{ + int mm_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMX(mm_flags)) { + c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx; +#if !ARCH_X86_64 + c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx; + c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx; + c->dirac_hpel_filter = dirac_hpel_filter_mmx; + c->add_rect_clamped = ff_add_rect_clamped_mmx; + c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_mmx; +#endif + PIXFUNC(put, 0, mmx); + PIXFUNC(avg, 0, mmx); + } + + if (EXTERNAL_MMXEXT(mm_flags)) { + PIXFUNC(avg, 0, mmxext); + } + + if (EXTERNAL_SSE2(mm_flags)) { + c->dirac_hpel_filter = dirac_hpel_filter_sse2; + c->add_rect_clamped = ff_add_rect_clamped_sse2; + c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2; + + c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2; + c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2; + + c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2; + c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2; + c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2; + c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2; + } +} |