diff options
author | Mans Rullgard <mans@mansr.com> | 2012-04-22 15:14:12 +0100 |
---|---|---|
committer | Mans Rullgard <mans@mansr.com> | 2012-04-25 21:41:39 +0100 |
commit | b692d246ead6ed56045904c1f839eb3cbf2cf2a7 (patch) | |
tree | 62a4d8db25f9beb390b9267f780f88ec60a74cc4 | |
parent | dac78fd1d73b3710f1c6718f7418bc5325fff494 (diff) | |
download | ffmpeg-b692d246ead6ed56045904c1f839eb3cbf2cf2a7.tar.gz |
vp8: arm: separate ARMv6 functions from NEON
This is a preparation for complete ARMv6 optimisations.
Signed-off-by: Mans Rullgard <mans@mansr.com>
-rw-r--r-- | libavcodec/arm/Makefile | 7 | ||||
-rw-r--r-- | libavcodec/arm/vp8dsp.h | 78 | ||||
-rw-r--r-- | libavcodec/arm/vp8dsp_armv6.S | 53 | ||||
-rw-r--r-- | libavcodec/arm/vp8dsp_init_arm.c | 144 | ||||
-rw-r--r-- | libavcodec/arm/vp8dsp_init_armv6.c | 34 | ||||
-rw-r--r-- | libavcodec/arm/vp8dsp_init_neon.c | 114 | ||||
-rw-r--r-- | libavcodec/arm/vp8dsp_neon.S | 29 |
7 files changed, 289 insertions, 170 deletions
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index e6d92189de..3d002ccdcf 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -13,7 +13,9 @@ ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_arm.o -ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o +ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \ + arm/vp8dsp_init_armv6.o \ + arm/vp8dsp_armv6.o OBJS-$(CONFIG_H264DSP) += arm/h264dsp_init_arm.o OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o @@ -79,7 +81,8 @@ NEON-OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_neon.o \ NEON-OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_neon.o \ arm/vp3dsp_neon.o \ -NEON-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_neon.o +NEON-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_neon.o \ + arm/vp8dsp_neon.o NEON-OBJS += arm/dsputil_init_neon.o \ arm/dsputil_neon.o \ diff --git a/libavcodec/arm/vp8dsp.h b/libavcodec/arm/vp8dsp.h new file mode 100644 index 0000000000..ce00e4abf5 --- /dev/null +++ b/libavcodec/arm/vp8dsp.h @@ -0,0 +1,78 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_ARM_VP8DSP_H +#define AVCODEC_ARM_VP8DSP_H + +#include "libavcodec/vp8dsp.h" + +void ff_vp8dsp_init_armv6(VP8DSPContext *dsp); +void ff_vp8dsp_init_neon(VP8DSPContext *dsp); + +#define VP8_LF_Y(hv, inner, opt) \ + void ff_vp8_##hv##_loop_filter16##inner##_##opt(uint8_t *dst, \ + ptrdiff_t stride, \ + int flim_E, int flim_I, \ + int hev_thresh) + +#define VP8_LF_UV(hv, inner, opt) \ + void ff_vp8_##hv##_loop_filter8uv##inner##_##opt(uint8_t *dstU, \ + uint8_t *dstV, \ + ptrdiff_t stride, \ + int flim_E, int flim_I, \ + int hev_thresh) + +#define VP8_LF_SIMPLE(hv, opt) \ + void ff_vp8_##hv##_loop_filter16_simple_##opt(uint8_t *dst, \ + ptrdiff_t stride, \ + int flim) + +#define VP8_LF_HV(inner, opt) \ + VP8_LF_Y(h, inner, opt); \ + VP8_LF_Y(v, inner, opt); \ + VP8_LF_UV(h, inner, opt); \ + VP8_LF_UV(v, inner, opt) + +#define VP8_LF(opt) \ + VP8_LF_HV(, opt); \ + VP8_LF_HV(_inner, opt); \ + VP8_LF_SIMPLE(h, opt); \ + VP8_LF_SIMPLE(v, opt) + +#define VP8_MC(n, opt) \ + void ff_put_vp8_##n##_##opt(uint8_t *dst, ptrdiff_t dststride, \ + uint8_t *src, ptrdiff_t srcstride, \ + int h, int x, int y) + +#define VP8_EPEL(w, opt) \ + VP8_MC(pixels ## w, opt); \ + VP8_MC(epel ## w ## _h4, opt); \ + VP8_MC(epel ## w ## _h6, opt); \ + VP8_MC(epel ## w ## _v4, opt); \ + VP8_MC(epel ## w ## _h4v4, opt); \ + VP8_MC(epel ## w ## _h6v4, opt); \ + VP8_MC(epel ## w ## _v6, opt); \ + VP8_MC(epel ## w ## _h4v6, opt); \ + VP8_MC(epel ## w ## _h6v6, opt) + +#define VP8_BILIN(w, opt) \ + VP8_MC(bilin ## w ## _h, opt); \ + VP8_MC(bilin ## w ## _v, opt); \ + VP8_MC(bilin ## w ## _hv, opt) + +#endif /* AVCODEC_ARM_VP8DSP_H */ diff --git a/libavcodec/arm/vp8dsp_armv6.S b/libavcodec/arm/vp8dsp_armv6.S new file mode 100644 index 0000000000..1ed020c069 --- /dev/null +++ b/libavcodec/arm/vp8dsp_armv6.S @@ -0,0 +1,53 @@ +/* + * VP8 ARMv6 optimisations + * + * Copyright (c) 2010 Rob Clark <rob@ti.com> + * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "asm.S" + +function ff_vp8_luma_dc_wht_dc_armv6, export=1 + ldrsh r2, [r1] + mov r3, #0 + add r2, r2, #3 + strh r3, [r1] + asr r2, r2, #3 + .rept 16 + strh r2, [r0], #32 + .endr + bx lr +endfunc + +function ff_put_vp8_pixels4_armv6, export=1 + ldr r12, [sp, #0] @ h + push {r4-r6,lr} +1: + subs r12, r12, #4 + ldr_post r4, r2, r3 + ldr_post r5, r2, r3 + ldr_post r6, r2, r3 + ldr_post lr, r2, r3 + str_post r4, r0, r1 + str_post r5, r0, r1 + str_post r6, r0, r1 + str_post lr, r0, r1 + bgt 1b + pop {r4-r6,pc} +endfunc diff --git a/libavcodec/arm/vp8dsp_init_arm.c b/libavcodec/arm/vp8dsp_init_arm.c index 29c64d4079..603f68cd24 100644 --- a/libavcodec/arm/vp8dsp_init_arm.c +++ b/libavcodec/arm/vp8dsp_init_arm.c @@ -20,148 +20,14 @@ #include "libavutil/arm/cpu.h" #include "libavcodec/vp8dsp.h" - -void ff_vp8_luma_dc_wht_neon(DCTELEM block[4][4][16], DCTELEM dc[16]); -void ff_vp8_luma_dc_wht_dc_neon(DCTELEM block[4][4][16], DCTELEM dc[16]); - -void ff_vp8_idct_add_neon(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride); -void ff_vp8_idct_dc_add_neon(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride); -void ff_vp8_idct_dc_add4y_neon(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride); -void ff_vp8_idct_dc_add4uv_neon(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride); - -void ff_vp8_v_loop_filter16_neon(uint8_t *dst, ptrdiff_t stride, - int flim_E, int flim_I, int hev_thresh); -void ff_vp8_h_loop_filter16_neon(uint8_t *dst, ptrdiff_t stride, - int flim_E, int flim_I, int hev_thresh); -void ff_vp8_v_loop_filter8uv_neon(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, - int flim_E, int flim_I, int hev_thresh); -void ff_vp8_h_loop_filter8uv_neon(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, - int flim_E, int flim_I, int hev_thresh); - -void ff_vp8_v_loop_filter16_inner_neon(uint8_t *dst, ptrdiff_t stride, - int flim_E, int flim_I, int hev_thresh); -void ff_vp8_h_loop_filter16_inner_neon(uint8_t *dst, ptrdiff_t stride, - int flim_E, int flim_I, int hev_thresh); -void ff_vp8_v_loop_filter8uv_inner_neon(uint8_t *dstU, uint8_t *dstV, - ptrdiff_t stride, int flim_E, int flim_I, - int hev_thresh); -void ff_vp8_h_loop_filter8uv_inner_neon(uint8_t *dstU, uint8_t *dstV, - ptrdiff_t stride, int flim_E, int flim_I, - int hev_thresh); - -void ff_vp8_v_loop_filter16_simple_neon(uint8_t *dst, ptrdiff_t stride, int flim); -void ff_vp8_h_loop_filter16_simple_neon(uint8_t *dst, ptrdiff_t stride, int flim); - - -#define VP8_MC(n) \ - void ff_put_vp8_##n##_neon(uint8_t *dst, ptrdiff_t dststride, \ - uint8_t *src, ptrdiff_t srcstride, \ - int h, int x, int y) - -#define VP8_EPEL(w) \ - VP8_MC(pixels ## w); \ - VP8_MC(epel ## w ## _h4); \ - VP8_MC(epel ## w ## _h6); \ - VP8_MC(epel ## w ## _v4); \ - VP8_MC(epel ## w ## _h4v4); \ - VP8_MC(epel ## w ## _h6v4); \ - VP8_MC(epel ## w ## _v6); \ - VP8_MC(epel ## w ## _h4v6); \ - VP8_MC(epel ## w ## _h6v6) - -VP8_EPEL(16); -VP8_EPEL(8); -VP8_EPEL(4); - -VP8_MC(bilin16_h); -VP8_MC(bilin16_v); -VP8_MC(bilin16_hv); -VP8_MC(bilin8_h); -VP8_MC(bilin8_v); -VP8_MC(bilin8_hv); -VP8_MC(bilin4_h); -VP8_MC(bilin4_v); -VP8_MC(bilin4_hv); +#include "vp8dsp.h" av_cold void ff_vp8dsp_init_arm(VP8DSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); - if (have_neon(cpu_flags)) { - dsp->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_neon; - dsp->vp8_luma_dc_wht_dc = ff_vp8_luma_dc_wht_dc_neon; - - dsp->vp8_idct_add = ff_vp8_idct_add_neon; - dsp->vp8_idct_dc_add = ff_vp8_idct_dc_add_neon; - dsp->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_neon; - dsp->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_neon; - - dsp->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16_neon; - dsp->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16_neon; - dsp->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_neon; - dsp->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_neon; - - dsp->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16_inner_neon; - dsp->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16_inner_neon; - dsp->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_neon; - dsp->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_neon; - - dsp->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter16_simple_neon; - dsp->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter16_simple_neon; - - dsp->put_vp8_epel_pixels_tab[0][0][0] = ff_put_vp8_pixels16_neon; - dsp->put_vp8_epel_pixels_tab[0][0][2] = ff_put_vp8_epel16_h6_neon; - dsp->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_neon; - dsp->put_vp8_epel_pixels_tab[0][2][2] = ff_put_vp8_epel16_h6v6_neon; - - dsp->put_vp8_epel_pixels_tab[1][0][0] = ff_put_vp8_pixels8_neon; - dsp->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_neon; - dsp->put_vp8_epel_pixels_tab[1][0][2] = ff_put_vp8_epel8_h6_neon; - dsp->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_neon; - dsp->put_vp8_epel_pixels_tab[1][1][1] = ff_put_vp8_epel8_h4v4_neon; - dsp->put_vp8_epel_pixels_tab[1][1][2] = ff_put_vp8_epel8_h6v4_neon; - dsp->put_vp8_epel_pixels_tab[1][2][0] = ff_put_vp8_epel8_v6_neon; - dsp->put_vp8_epel_pixels_tab[1][2][1] = ff_put_vp8_epel8_h4v6_neon; - dsp->put_vp8_epel_pixels_tab[1][2][2] = ff_put_vp8_epel8_h6v6_neon; - - dsp->put_vp8_epel_pixels_tab[2][0][0] = ff_put_vp8_pixels4_neon; - dsp->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_neon; - dsp->put_vp8_epel_pixels_tab[2][0][2] = ff_put_vp8_epel4_h6_neon; - dsp->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_neon; - dsp->put_vp8_epel_pixels_tab[2][1][1] = ff_put_vp8_epel4_h4v4_neon; - dsp->put_vp8_epel_pixels_tab[2][1][2] = ff_put_vp8_epel4_h6v4_neon; - dsp->put_vp8_epel_pixels_tab[2][2][0] = ff_put_vp8_epel4_v6_neon; - dsp->put_vp8_epel_pixels_tab[2][2][1] = ff_put_vp8_epel4_h4v6_neon; - dsp->put_vp8_epel_pixels_tab[2][2][2] = ff_put_vp8_epel4_h6v6_neon; - - dsp->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_neon; - dsp->put_vp8_bilinear_pixels_tab[0][0][1] = ff_put_vp8_bilin16_h_neon; - dsp->put_vp8_bilinear_pixels_tab[0][0][2] = ff_put_vp8_bilin16_h_neon; - dsp->put_vp8_bilinear_pixels_tab[0][1][0] = ff_put_vp8_bilin16_v_neon; - dsp->put_vp8_bilinear_pixels_tab[0][1][1] = ff_put_vp8_bilin16_hv_neon; - dsp->put_vp8_bilinear_pixels_tab[0][1][2] = ff_put_vp8_bilin16_hv_neon; - dsp->put_vp8_bilinear_pixels_tab[0][2][0] = ff_put_vp8_bilin16_v_neon; - dsp->put_vp8_bilinear_pixels_tab[0][2][1] = ff_put_vp8_bilin16_hv_neon; - dsp->put_vp8_bilinear_pixels_tab[0][2][2] = ff_put_vp8_bilin16_hv_neon; - - dsp->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_neon; - dsp->put_vp8_bilinear_pixels_tab[1][0][1] = ff_put_vp8_bilin8_h_neon; - dsp->put_vp8_bilinear_pixels_tab[1][0][2] = ff_put_vp8_bilin8_h_neon; - dsp->put_vp8_bilinear_pixels_tab[1][1][0] = ff_put_vp8_bilin8_v_neon; - dsp->put_vp8_bilinear_pixels_tab[1][1][1] = ff_put_vp8_bilin8_hv_neon; - dsp->put_vp8_bilinear_pixels_tab[1][1][2] = ff_put_vp8_bilin8_hv_neon; - dsp->put_vp8_bilinear_pixels_tab[1][2][0] = ff_put_vp8_bilin8_v_neon; - dsp->put_vp8_bilinear_pixels_tab[1][2][1] = ff_put_vp8_bilin8_hv_neon; - dsp->put_vp8_bilinear_pixels_tab[1][2][2] = ff_put_vp8_bilin8_hv_neon; - - dsp->put_vp8_bilinear_pixels_tab[2][0][0] = ff_put_vp8_pixels4_neon; - dsp->put_vp8_bilinear_pixels_tab[2][0][1] = ff_put_vp8_bilin4_h_neon; - dsp->put_vp8_bilinear_pixels_tab[2][0][2] = ff_put_vp8_bilin4_h_neon; - dsp->put_vp8_bilinear_pixels_tab[2][1][0] = ff_put_vp8_bilin4_v_neon; - dsp->put_vp8_bilinear_pixels_tab[2][1][1] = ff_put_vp8_bilin4_hv_neon; - dsp->put_vp8_bilinear_pixels_tab[2][1][2] = ff_put_vp8_bilin4_hv_neon; - dsp->put_vp8_bilinear_pixels_tab[2][2][0] = ff_put_vp8_bilin4_v_neon; - dsp->put_vp8_bilinear_pixels_tab[2][2][1] = ff_put_vp8_bilin4_hv_neon; - dsp->put_vp8_bilinear_pixels_tab[2][2][2] = ff_put_vp8_bilin4_hv_neon; - } + if (have_armv6(cpu_flags)) + ff_vp8dsp_init_armv6(dsp); + if (have_neon(cpu_flags)) + ff_vp8dsp_init_neon(dsp); } diff --git a/libavcodec/arm/vp8dsp_init_armv6.c b/libavcodec/arm/vp8dsp_init_armv6.c new file mode 100644 index 0000000000..a1dcef57ee --- /dev/null +++ b/libavcodec/arm/vp8dsp_init_armv6.c @@ -0,0 +1,34 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> +#include "libavcodec/vp8dsp.h" +#include "vp8dsp.h" + +void ff_vp8_luma_dc_wht_dc_armv6(DCTELEM block[4][4][16], DCTELEM dc[16]); + +VP8_MC(pixels4, armv6); + +av_cold void ff_vp8dsp_init_armv6(VP8DSPContext *dsp) +{ + dsp->vp8_luma_dc_wht_dc = ff_vp8_luma_dc_wht_dc_armv6; + + dsp->put_vp8_epel_pixels_tab[2][0][0] = ff_put_vp8_pixels4_armv6; + + dsp->put_vp8_bilinear_pixels_tab[2][0][0] = ff_put_vp8_pixels4_armv6; +} diff --git a/libavcodec/arm/vp8dsp_init_neon.c b/libavcodec/arm/vp8dsp_init_neon.c new file mode 100644 index 0000000000..dbe5b9f961 --- /dev/null +++ b/libavcodec/arm/vp8dsp_init_neon.c @@ -0,0 +1,114 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> +#include "libavcodec/vp8dsp.h" +#include "vp8dsp.h" + +void ff_vp8_luma_dc_wht_neon(DCTELEM block[4][4][16], DCTELEM dc[16]); + +void ff_vp8_idct_add_neon(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride); +void ff_vp8_idct_dc_add_neon(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride); +void ff_vp8_idct_dc_add4y_neon(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride); +void ff_vp8_idct_dc_add4uv_neon(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride); + +VP8_LF(neon); + +VP8_EPEL(16, neon); +VP8_EPEL(8, neon); +VP8_EPEL(4, neon); + +VP8_BILIN(16, neon); +VP8_BILIN(8, neon); +VP8_BILIN(4, neon); + +av_cold void ff_vp8dsp_init_neon(VP8DSPContext *dsp) +{ + dsp->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_neon; + + dsp->vp8_idct_add = ff_vp8_idct_add_neon; + dsp->vp8_idct_dc_add = ff_vp8_idct_dc_add_neon; + dsp->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_neon; + dsp->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_neon; + + dsp->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16_neon; + dsp->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16_neon; + dsp->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_neon; + dsp->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_neon; + + dsp->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16_inner_neon; + dsp->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16_inner_neon; + dsp->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_neon; + dsp->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_neon; + + dsp->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter16_simple_neon; + dsp->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter16_simple_neon; + + dsp->put_vp8_epel_pixels_tab[0][0][0] = ff_put_vp8_pixels16_neon; + dsp->put_vp8_epel_pixels_tab[0][0][2] = ff_put_vp8_epel16_h6_neon; + dsp->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_neon; + dsp->put_vp8_epel_pixels_tab[0][2][2] = ff_put_vp8_epel16_h6v6_neon; + + dsp->put_vp8_epel_pixels_tab[1][0][0] = ff_put_vp8_pixels8_neon; + dsp->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_neon; + dsp->put_vp8_epel_pixels_tab[1][0][2] = ff_put_vp8_epel8_h6_neon; + dsp->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_neon; + dsp->put_vp8_epel_pixels_tab[1][1][1] = ff_put_vp8_epel8_h4v4_neon; + dsp->put_vp8_epel_pixels_tab[1][1][2] = ff_put_vp8_epel8_h6v4_neon; + dsp->put_vp8_epel_pixels_tab[1][2][0] = ff_put_vp8_epel8_v6_neon; + dsp->put_vp8_epel_pixels_tab[1][2][1] = ff_put_vp8_epel8_h4v6_neon; + dsp->put_vp8_epel_pixels_tab[1][2][2] = ff_put_vp8_epel8_h6v6_neon; + + dsp->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_neon; + dsp->put_vp8_epel_pixels_tab[2][0][2] = ff_put_vp8_epel4_h6_neon; + dsp->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_neon; + dsp->put_vp8_epel_pixels_tab[2][1][1] = ff_put_vp8_epel4_h4v4_neon; + dsp->put_vp8_epel_pixels_tab[2][1][2] = ff_put_vp8_epel4_h6v4_neon; + dsp->put_vp8_epel_pixels_tab[2][2][0] = ff_put_vp8_epel4_v6_neon; + dsp->put_vp8_epel_pixels_tab[2][2][1] = ff_put_vp8_epel4_h4v6_neon; + dsp->put_vp8_epel_pixels_tab[2][2][2] = ff_put_vp8_epel4_h6v6_neon; + + dsp->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_neon; + dsp->put_vp8_bilinear_pixels_tab[0][0][1] = ff_put_vp8_bilin16_h_neon; + dsp->put_vp8_bilinear_pixels_tab[0][0][2] = ff_put_vp8_bilin16_h_neon; + dsp->put_vp8_bilinear_pixels_tab[0][1][0] = ff_put_vp8_bilin16_v_neon; + dsp->put_vp8_bilinear_pixels_tab[0][1][1] = ff_put_vp8_bilin16_hv_neon; + dsp->put_vp8_bilinear_pixels_tab[0][1][2] = ff_put_vp8_bilin16_hv_neon; + dsp->put_vp8_bilinear_pixels_tab[0][2][0] = ff_put_vp8_bilin16_v_neon; + dsp->put_vp8_bilinear_pixels_tab[0][2][1] = ff_put_vp8_bilin16_hv_neon; + dsp->put_vp8_bilinear_pixels_tab[0][2][2] = ff_put_vp8_bilin16_hv_neon; + + dsp->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_neon; + dsp->put_vp8_bilinear_pixels_tab[1][0][1] = ff_put_vp8_bilin8_h_neon; + dsp->put_vp8_bilinear_pixels_tab[1][0][2] = ff_put_vp8_bilin8_h_neon; + dsp->put_vp8_bilinear_pixels_tab[1][1][0] = ff_put_vp8_bilin8_v_neon; + dsp->put_vp8_bilinear_pixels_tab[1][1][1] = ff_put_vp8_bilin8_hv_neon; + dsp->put_vp8_bilinear_pixels_tab[1][1][2] = ff_put_vp8_bilin8_hv_neon; + dsp->put_vp8_bilinear_pixels_tab[1][2][0] = ff_put_vp8_bilin8_v_neon; + dsp->put_vp8_bilinear_pixels_tab[1][2][1] = ff_put_vp8_bilin8_hv_neon; + dsp->put_vp8_bilinear_pixels_tab[1][2][2] = ff_put_vp8_bilin8_hv_neon; + + dsp->put_vp8_bilinear_pixels_tab[2][0][1] = ff_put_vp8_bilin4_h_neon; + dsp->put_vp8_bilinear_pixels_tab[2][0][2] = ff_put_vp8_bilin4_h_neon; + dsp->put_vp8_bilinear_pixels_tab[2][1][0] = ff_put_vp8_bilin4_v_neon; + dsp->put_vp8_bilinear_pixels_tab[2][1][1] = ff_put_vp8_bilin4_hv_neon; + dsp->put_vp8_bilinear_pixels_tab[2][1][2] = ff_put_vp8_bilin4_hv_neon; + dsp->put_vp8_bilinear_pixels_tab[2][2][0] = ff_put_vp8_bilin4_v_neon; + dsp->put_vp8_bilinear_pixels_tab[2][2][1] = ff_put_vp8_bilin4_hv_neon; + dsp->put_vp8_bilinear_pixels_tab[2][2][2] = ff_put_vp8_bilin4_hv_neon; +} diff --git a/libavcodec/arm/vp8dsp_neon.S b/libavcodec/arm/vp8dsp_neon.S index e9f5b298ce..c1a91e1461 100644 --- a/libavcodec/arm/vp8dsp_neon.S +++ b/libavcodec/arm/vp8dsp_neon.S @@ -77,18 +77,6 @@ function ff_vp8_luma_dc_wht_neon, export=1 bx lr endfunc -function ff_vp8_luma_dc_wht_dc_neon, export=1 - ldrsh r2, [r1] - mov r3, #0 - add r2, r2, #3 - strh r3, [r1] - asr r2, r2, #3 - .rept 16 - strh r2, [r0], #32 - .endr - bx lr -endfunc - function ff_vp8_idct_add_neon, export=1 vld1.16 {q0-q1}, [r1,:128] movw r3, #20091 @@ -725,23 +713,6 @@ function ff_put_vp8_pixels8_neon, export=1 bx lr endfunc -function ff_put_vp8_pixels4_neon, export=1 - ldr r12, [sp, #0] @ h - push {r4-r6,lr} -1: - subs r12, r12, #4 - ldr_post r4, r2, r3 - ldr_post r5, r2, r3 - ldr_post r6, r2, r3 - ldr_post lr, r2, r3 - str_post r4, r0, r1 - str_post r5, r0, r1 - str_post r6, r0, r1 - str_post lr, r0, r1 - bgt 1b - pop {r4-r6,pc} -endfunc - /* 4/6-tap 8th-pel MC */ .macro vp8_epel8_h6 d, a, b |