diff options
author | Matthieu Bouron <matthieu.bouron@stupeflix.com> | 2015-11-05 14:49:57 +0100 |
---|---|---|
committer | Clément Bœsch <clement@stupeflix.com> | 2015-11-23 14:47:12 +0100 |
commit | 46feb66972bb9e872aebf15b9fe6ee9075e2615a (patch) | |
tree | b5ce41fca4aeaf59686747b3d660e851bdc9b767 /libswscale/arm/swscale_unscaled.c | |
parent | e9ec28c95ef68543648073bb9e1531bfc7c85048 (diff) | |
download | ffmpeg-46feb66972bb9e872aebf15b9fe6ee9075e2615a.tar.gz |
swscale/arm: add ff_nv{12,21}_to_{argb,rgba,abgr,bgra}_neon
Signed-off-by: Matthieu Bouron <matthieu.bouron@stupeflix.com>
Signed-off-by: Clément Bœsch <clement@stupeflix.com>
Diffstat (limited to 'libswscale/arm/swscale_unscaled.c')
-rw-r--r-- | libswscale/arm/swscale_unscaled.c | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/libswscale/arm/swscale_unscaled.c b/libswscale/arm/swscale_unscaled.c index 04be7622bc..5d0c8e9e4c 100644 --- a/libswscale/arm/swscale_unscaled.c +++ b/libswscale/arm/swscale_unscaled.c @@ -23,6 +23,7 @@ #include "libswscale/swscale_internal.h" #include "libavutil/arm/cpu.h" +#if 0 extern void rgbx_to_nv12_neon_32(const uint8_t *src, uint8_t *y, uint8_t *chroma, int width, int height, int y_stride, int c_stride, int src_stride, @@ -60,8 +61,69 @@ static int rgbx_to_nv12_neon_16_wrapper(SwsContext *context, const uint8_t *src[ return 0; } +#endif + +#define DECLARE_FF_NVX_TO_RGBX_FUNCS(ifmt, ofmt) \ +int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \ + uint8_t *dst, int linesize, \ + const uint8_t *srcY, int linesizeY, \ + const uint8_t *srcC, int linesizeC, \ + const int16_t *table, \ + int y_offset, \ + int y_coeff); \ + \ +static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[], \ + int srcStride[], int srcSliceY, int srcSliceH, \ + uint8_t *dst[], int dstStride[]) { \ + const int16_t yuv2rgb_table[] = { \ + c->yuv2rgb_v2r_coeff, \ + c->yuv2rgb_u2g_coeff, \ + c->yuv2rgb_v2g_coeff, \ + c->yuv2rgb_u2b_coeff, \ + }; \ + \ + ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \ + dst[0] + srcSliceY * dstStride[0], dstStride[0], \ + src[0] + srcSliceY * srcStride[0], srcStride[0], \ + src[1] + (srcSliceY / 2) * srcStride[1], srcStride[1], \ + yuv2rgb_table, \ + c->yuv2rgb_y_offset >> 9, \ + c->yuv2rgb_y_coeff); \ + \ + return 0; \ +} + +#define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx) \ +DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, argb) \ +DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, rgba) \ +DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, abgr) \ +DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, bgra) \ + +DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nv12) +DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nv21) + +/* We need a 16 pixel width alignment. This constraint can easily be removed + * for input reading but for the output which is 4-bytes per pixel (RGBA) the + * assembly might be writing as much as 4*15=60 extra bytes at the end of the + * line, which won't fit the 32-bytes buffer alignment. */ +#define SET_FF_NVX_TO_RGBX_FUNC(ifmt, IFMT, ofmt, OFMT) do { \ + if (c->srcFormat == AV_PIX_FMT_##IFMT \ + && c->dstFormat == AV_PIX_FMT_##OFMT \ + && !(c->srcH & 1) \ + && !(c->srcW & 15)) { \ + c->swscale = ifmt##_to_##ofmt##_neon_wrapper; \ + } \ +} while (0) + +#define SET_FF_NVX_TO_ALL_RGBX_FUNC(nvx, NVX) do { \ + SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, argb, ARGB); \ + SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, rgba, RGBA); \ + SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, abgr, ABGR); \ + SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, bgra, BGRA); \ +} while (0) static void get_unscaled_swscale_neon(SwsContext *c) { +#if 0 int accurate_rnd = c->flags & SWS_ACCURATE_RND; if (c->srcFormat == AV_PIX_FMT_RGBA && c->dstFormat == AV_PIX_FMT_NV12 @@ -69,6 +131,10 @@ static void get_unscaled_swscale_neon(SwsContext *c) { c->swscale = accurate_rnd ? rgbx_to_nv12_neon_32_wrapper : rgbx_to_nv12_neon_16_wrapper; } +#endif + + SET_FF_NVX_TO_ALL_RGBX_FUNC(nv12, NV12); + SET_FF_NVX_TO_ALL_RGBX_FUNC(nv21, NV21); } void ff_get_unscaled_swscale_arm(SwsContext *c) |