diff options
author | Shivraj Patil <shivraj.patil@imgtec.com> | 2015-06-14 23:26:26 +0530 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2015-06-29 12:03:43 +0200 |
commit | d9deae04a78b6b698b90d050a67a3bd9155aba74 (patch) | |
tree | bd206629dbbe92e8704746195c793abca9f7f91a | |
parent | 21cede9e970aca7aa0981f7be8378d48fd8e7730 (diff) | |
download | ffmpeg-d9deae04a78b6b698b90d050a67a3bd9155aba74.tar.gz |
avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for pixblock functions
This patch adds MSA (MIPS-SIMD-Arch) optimizations for pixblock functions in new file pixblockdsp_msa.c
Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h
Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/mips/Makefile | 2 | ||||
-rw-r--r-- | libavcodec/mips/pixblockdsp_init_mips.c | 53 | ||||
-rw-r--r-- | libavcodec/mips/pixblockdsp_mips.h | 33 | ||||
-rw-r--r-- | libavcodec/mips/pixblockdsp_msa.c | 143 | ||||
-rw-r--r-- | libavcodec/pixblockdsp.c | 2 | ||||
-rw-r--r-- | libavcodec/pixblockdsp.h | 2 | ||||
-rw-r--r-- | libavutil/mips/generic_macros_msa.h | 8 |
7 files changed, 243 insertions, 0 deletions
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 823a2c56e0..c0ecb15656 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -28,6 +28,7 @@ OBJS-$(CONFIG_H263DSP) += mips/h263dsp_init_mips.o OBJS-$(CONFIG_QPELDSP) += mips/qpeldsp_init_mips.o OBJS-$(CONFIG_HPELDSP) += mips/hpeldsp_init_mips.o OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_init_mips.o +OBJS-$(CONFIG_PIXBLOCKDSP) += mips/pixblockdsp_init_mips.o MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o \ mips/hevc_mc_uni_msa.o \ mips/hevc_mc_uniw_msa.o \ @@ -45,5 +46,6 @@ MSA-OBJS-$(CONFIG_H263DSP) += mips/h263dsp_msa.o MSA-OBJS-$(CONFIG_QPELDSP) += mips/qpeldsp_msa.o MSA-OBJS-$(CONFIG_HPELDSP) += mips/hpeldsp_msa.o MSA-OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_msa.o +MSA-OBJS-$(CONFIG_PIXBLOCKDSP) += mips/pixblockdsp_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o diff --git a/libavcodec/mips/pixblockdsp_init_mips.c b/libavcodec/mips/pixblockdsp_init_mips.c new file mode 100644 index 0000000000..0f2fb15180 --- /dev/null +++ b/libavcodec/mips/pixblockdsp_init_mips.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "pixblockdsp_mips.h" + +#if HAVE_MSA +static av_cold void pixblockdsp_init_msa(PixblockDSPContext *c, + AVCodecContext *avctx, + unsigned high_bit_depth) +{ + c->diff_pixels = ff_diff_pixels_msa; + + switch (avctx->bits_per_raw_sample) { + case 9: + case 10: + case 12: + case 14: + c->get_pixels = ff_get_pixels_16_msa; + break; + default: + if (avctx->bits_per_raw_sample <= 8 || avctx->codec_type != + AVMEDIA_TYPE_VIDEO) { + c->get_pixels = ff_get_pixels_8_msa; + } + break; + } +} +#endif // #if HAVE_MSA + +void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) +{ +#if HAVE_MSA + pixblockdsp_init_msa(c, avctx, high_bit_depth); +#endif // #if HAVE_MSA +} diff --git a/libavcodec/mips/pixblockdsp_mips.h b/libavcodec/mips/pixblockdsp_mips.h new file mode 100644 index 0000000000..3eee6e0159 --- /dev/null +++ b/libavcodec/mips/pixblockdsp_mips.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H +#define AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H + +#include "../mpegvideo.h" + +void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1, + const uint8_t *src2, int stride); +void ff_get_pixels_16_msa(int16_t *restrict dst, const uint8_t *src, + ptrdiff_t stride); +void ff_get_pixels_8_msa(int16_t *restrict dst, const uint8_t *src, + ptrdiff_t stride); + +#endif // #ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H diff --git a/libavcodec/mips/pixblockdsp_msa.c b/libavcodec/mips/pixblockdsp_msa.c new file mode 100644 index 0000000000..966e11a7f5 --- /dev/null +++ b/libavcodec/mips/pixblockdsp_msa.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/mips/generic_macros_msa.h" +#include "pixblockdsp_mips.h" + +static void diff_pixels_msa(int16_t *block, const uint8_t *src1, + const uint8_t *src2, int32_t stride) +{ + v16u8 in10, in11, in12, in13, in14, in15, in16, in17; + v16u8 in20, in21, in22, in23, in24, in25, in26, in27; + v8i16 out0, out1, out2, out3, out4, out5, out6, out7; + + LD_UB8(src1, stride, in10, in11, in12, in13, in14, in15, in16, in17); + LD_UB8(src2, stride, in20, in21, in22, in23, in24, in25, in26, in27); + ILVR_B4_SH(in10, in20, in11, in21, in12, in22, in13, in23, + out0, out1, out2, out3); + ILVR_B4_SH(in14, in24, in15, in25, in16, in26, in17, in27, + out4, out5, out6, out7); + HSUB_UB4_SH(out0, out1, out2, out3, out0, out1, out2, out3); + HSUB_UB4_SH(out4, out5, out6, out7, out4, out5, out6, out7); + ST_SH8(out0, out1, out2, out3, out4, out5, out6, out7, block, 8); +} + +static void copy_8bit_to_16bit_width8_msa(const uint8_t *src, int32_t src_stride, + int16_t *dst, int32_t dst_stride, + int32_t height) +{ + uint8_t *dst_ptr; + int32_t cnt; + v16u8 src0, src1, src2, src3; + v16i8 zero = { 0 }; + + dst_ptr = (uint8_t *) dst; + + for (cnt = (height >> 2); cnt--;) { + LD_UB4(src, src_stride, src0, src1, src2, src3); + src += (4 * src_stride); + + ILVR_B4_UB(zero, src0, zero, src1, zero, src2, zero, src3, + src0, src1, src2, src3); + + ST_UB4(src0, src1, src2, src3, dst_ptr, (dst_stride * 2)); + dst_ptr += (4 * 2 * dst_stride); + } +} + +static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride, + uint8_t *dst, int32_t dst_stride, + int32_t height, int32_t width) +{ + int32_t cnt, loop_cnt; + const uint8_t *src_tmp; + uint8_t *dst_tmp; + v16u8 src0, src1, src2, src3, src4, src5, src6, src7; + + for (cnt = (width >> 4); cnt--;) { + src_tmp = src; + dst_tmp = dst; + + for (loop_cnt = (height >> 3); loop_cnt--;) { + LD_UB8(src_tmp, src_stride, + src0, src1, src2, src3, src4, src5, src6, src7); + src_tmp += (8 * src_stride); + + ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, + dst_tmp, dst_stride); + dst_tmp += (8 * dst_stride); + } + + src += 16; + dst += 16; + } +} + +static void copy_width16_msa(const uint8_t *src, int32_t src_stride, + uint8_t *dst, int32_t dst_stride, + int32_t height) +{ + int32_t cnt; + v16u8 src0, src1, src2, src3, src4, src5, src6, src7; + + if (0 == height % 12) { + for (cnt = (height / 12); cnt--;) { + LD_UB8(src, src_stride, + src0, src1, src2, src3, src4, src5, src6, src7); + src += (8 * src_stride); + ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, + dst, dst_stride); + dst += (8 * dst_stride); + + LD_UB4(src, src_stride, src0, src1, src2, src3); + src += (4 * src_stride); + ST_UB4(src0, src1, src2, src3, dst, dst_stride); + dst += (4 * dst_stride); + } + } else if (0 == height % 8) { + copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16); + } else if (0 == height % 4) { + for (cnt = (height >> 2); cnt--;) { + LD_UB4(src, src_stride, src0, src1, src2, src3); + src += (4 * src_stride); + + ST_UB4(src0, src1, src2, src3, dst, dst_stride); + dst += (4 * dst_stride); + } + } +} + +void ff_get_pixels_16_msa(int16_t *av_restrict dest, const uint8_t *src, + ptrdiff_t stride) +{ + copy_width16_msa(src, stride, (uint8_t *) dest, 16, 8); +} + +void ff_get_pixels_8_msa(int16_t *av_restrict dest, const uint8_t *src, + ptrdiff_t stride) +{ + copy_8bit_to_16bit_width8_msa(src, stride, dest, 8, 8); +} + +void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1, + const uint8_t *src2, int stride) +{ + diff_pixels_msa(block, src1, src2, stride); +} diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c index ebde68b6a4..322e1dd111 100644 --- a/libavcodec/pixblockdsp.c +++ b/libavcodec/pixblockdsp.c @@ -79,4 +79,6 @@ av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx) ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth); if (ARCH_X86) ff_pixblockdsp_init_x86(c, avctx, high_bit_depth); + if (ARCH_MIPS) + ff_pixblockdsp_init_mips(c, avctx, high_bit_depth); } diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h index d4b8590341..79ed86c3a6 100644 --- a/libavcodec/pixblockdsp.h +++ b/libavcodec/pixblockdsp.h @@ -42,5 +42,7 @@ void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth); void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth); +void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); #endif /* AVCODEC_PIXBLOCKDSP_H */ diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index 272a42316f..1738c522f3 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -1206,6 +1206,14 @@ #define HSUB_UB2_UH(...) HSUB_UB2(v8u16, __VA_ARGS__) #define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__) +#define HSUB_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ +{ \ + HSUB_UB2(RTYPE, in0, in1, out0, out1); \ + HSUB_UB2(RTYPE, in2, in3, out2, out3); \ +} +#define HSUB_UB4_UH(...) HSUB_UB4(v8u16, __VA_ARGS__) +#define HSUB_UB4_SH(...) HSUB_UB4(v8i16, __VA_ARGS__) + /* Description : Insert specified word elements from input vectors to 1 destination vector Arguments : Inputs - in0, in1, in2, in3 (4 input vectors) |