libswscale: add output support for AV_PIX_FMT_GBRAPF32

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
author: Mark Reid <mindmark@gmail.com> 2020-05-03 16:10:04 -0700
committer: Michael Niedermayer <michael@niedermayer.cc> 2020-05-05 20:06:58 +0200
commit: b4967fc71c63eae8cd96f9c46cd3e1fbd705bbf9 (patch)
tree: 3596a2ed253605b9f3f29a7b66dcc755a0c7b482 /libswscale
parent: ba5d0515a6dc0e34d578e5ef94b6ca5f17fc979d (diff)
download: ffmpeg-b4967fc71c63eae8cd96f9c46cd3e1fbd705bbf9.tar.gz
6 files changed, 177 insertions, 15 deletions
diff --git a/libswscale/output.c b/libswscale/output.c
index 68f43ffba3..e864e515d0 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -2313,6 +2313,82 @@ yuv2gbrp16_full_X_c(SwsContext *c, const int16_t *lumFilter,
 }
 
 static void
+yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter,
+                    const int16_t **lumSrcx, int lumFilterSize,
+                    const int16_t *chrFilter, const int16_t **chrUSrcx,
+                    const int16_t **chrVSrcx, int chrFilterSize,
+                    const int16_t **alpSrcx, uint8_t **dest,
+                    int dstW, int y)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
+    int i;
+    int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrcx;
+    uint32_t **dest32 = (uint32_t**)dest;
+    const int32_t **lumSrc  = (const int32_t**)lumSrcx;
+    const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
+    const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
+    const int32_t **alpSrc  = (const int32_t**)alpSrcx;
+    static const float float_mult = 1.0f / 65535.0f;
+
+    for (i = 0; i < dstW; i++) {
+        int j;
+        int Y = -0x40000000;
+        int U = -(128 << 23);
+        int V = -(128 << 23);
+        int R, G, B, A;
+
+        for (j = 0; j < lumFilterSize; j++)
+            Y += lumSrc[j][i] * (unsigned)lumFilter[j];
+
+        for (j = 0; j < chrFilterSize; j++) {
+            U += chrUSrc[j][i] * (unsigned)chrFilter[j];
+            V += chrVSrc[j][i] * (unsigned)chrFilter[j];
+        }
+
+        Y >>= 14;
+        Y += 0x10000;
+        U >>= 14;
+        V >>= 14;
+
+        if (hasAlpha) {
+            A = -0x40000000;
+
+            for (j = 0; j < lumFilterSize; j++)
+                A += alpSrc[j][i] * (unsigned)lumFilter[j];
+
+            A >>= 1;
+            A += 0x20002000;
+        }
+
+        Y -= c->yuv2rgb_y_offset;
+        Y *= c->yuv2rgb_y_coeff;
+        Y += 1 << 13;
+        R = V * c->yuv2rgb_v2r_coeff;
+        G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+        B =                            U * c->yuv2rgb_u2b_coeff;
+
+        R = av_clip_uintp2(Y + R, 30);
+        G = av_clip_uintp2(Y + G, 30);
+        B = av_clip_uintp2(Y + B, 30);
+
+        dest32[0][i] = av_float2int(float_mult * (float)(G >> 14));
+        dest32[1][i] = av_float2int(float_mult * (float)(B >> 14));
+        dest32[2][i] = av_float2int(float_mult * (float)(R >> 14));
+        if (hasAlpha)
+            dest32[3][i] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14));
+    }
+    if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
+        for (i = 0; i < dstW; i++) {
+            dest32[0][i] = av_bswap32(dest32[0][i]);
+            dest32[1][i] = av_bswap32(dest32[1][i]);
+            dest32[2][i] = av_bswap32(dest32[2][i]);
+            if (hasAlpha)
+                dest32[3][i] = av_bswap32(dest32[3][i]);
+        }
+    }
+}
+
+static void
 yuv2ya8_1_c(SwsContext *c, const int16_t *buf0,
             const int16_t *ubuf[2], const int16_t *vbuf[2],
             const int16_t *abuf0, uint8_t *dest, int dstW,
@@ -2716,6 +2792,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
         case AV_PIX_FMT_GBRAP16LE:
             *yuv2anyX = yuv2gbrp16_full_X_c;
             break;
+        case AV_PIX_FMT_GBRPF32BE:
+        case AV_PIX_FMT_GBRPF32LE:
+        case AV_PIX_FMT_GBRAPF32BE:
+        case AV_PIX_FMT_GBRAPF32LE:
+            *yuv2anyX = yuv2gbrpf32_full_X_c;
+            break;
         }
         if (!*yuv2packedX && !*yuv2anyX)
             goto YUV_PACKED;
diff --git a/libswscale/slice.c b/libswscale/slice.c
index db4fa874ff..7849b70f4d 100644
--- a/libswscale/slice.c
+++ b/libswscale/slice.c
@@ -189,23 +189,26 @@ int ff_init_slice_from_src(SwsSlice * s, uint8_t *src[4], int stride[4], int src
     return 0;
 }
 
-static void fill_ones(SwsSlice *s, int n, int is16bit)
+static void fill_ones(SwsSlice *s, int n, int bpc)
 {
-    int i;
+    int i, j, k, size, end;
+
     for (i = 0; i < 4; ++i) {
-        int j;
-        int size = s->plane[i].available_lines;
+        size = s->plane[i].available_lines;
         for (j = 0; j < size; ++j) {
-            int k;
-            int end = is16bit ? n>>1: n;
-            // fill also one extra element
-            end += 1;
-            if (is16bit)
+            if (bpc == 16) {
+                end = (n>>1) + 1;
                 for (k = 0; k < end; ++k)
                     ((int32_t*)(s->plane[i].line[j]))[k] = 1<<18;
-            else
+            } else if (bpc == 32) {
+                end = (n>>2) + 1;
+                for (k = 0; k < end; ++k)
+                    ((int64_t*)(s->plane[i].line[j]))[k] = 1LL<<34;
+            } else {
+                end = n + 1;
                 for (k = 0; k < end; ++k)
                     ((int16_t*)(s->plane[i].line[j]))[k] = 1<<14;
+            }
         }
     }
 }
@@ -272,6 +275,9 @@ int ff_init_filters(SwsContext * c)
     if (c->dstBpc == 16)
         dst_stride <<= 1;
 
+    if (c->dstBpc == 32)
+        dst_stride <<= 2;
+
     num_ydesc = need_lum_conv ? 2 : 1;
     num_cdesc = need_chr_conv ? 2 : 1;
 
@@ -302,7 +308,7 @@ int ff_init_filters(SwsContext * c)
     res = alloc_lines(&c->slice[i], dst_stride, c->dstW);
     if (res < 0) goto cleanup;
 
-    fill_ones(&c->slice[i], dst_stride>>1, c->dstBpc == 16);
+    fill_ones(&c->slice[i], dst_stride>>1, c->dstBpc);
 
     // vertical scaler output
     ++i;
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 001cfbf15b..9cb7e8f6ac 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -500,6 +500,11 @@ static int swscale(SwsContext *c, const uint8_t *src[],
             fillPlane16(dst[3], dstStride[3], length, height, lastDstY,
                     1, desc->comp[3].depth,
                     isBE(dstFormat));
+        } else if (is32BPS(dstFormat)) {
+            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
+            fillPlane32(dst[3], dstStride[3], length, height, lastDstY,
+                    1, desc->comp[3].depth,
+                    isBE(dstFormat), desc->flags & AV_PIX_FMT_FLAG_FLOAT);
         } else
             fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255);
     }
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 9dda53eead..ee46092ff6 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -647,6 +647,13 @@ static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
     return desc->comp[0].depth == 16;
 }
 
+static av_always_inline int is32BPS(enum AVPixelFormat pix_fmt)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+    av_assert0(desc);
+    return desc->comp[0].depth == 32;
+}
+
 static av_always_inline int isNBPS(enum AVPixelFormat pix_fmt)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
@@ -918,8 +925,37 @@ static inline void fillPlane16(uint8_t *plane, int stride, int width, int height
         }
         ptr += stride;
     }
+#undef FILL
 }
 
+static inline void fillPlane32(uint8_t *plane, int stride, int width, int height, int y,
+                               int alpha, int bits, const int big_endian, int is_float)
+{
+    int i, j;
+    uint8_t *ptr = plane + stride * y;
+    uint32_t v;
+    uint32_t onef32 = 0x3f800000;
+    if (is_float)
+        v = alpha ? onef32 : 0;
+    else
+        v = alpha ? 0xFFFFFFFF>>(32-bits) : (1<<(bits-1));
+
+    for (i = 0; i < height; i++) {
+#define FILL(wfunc) \
+        for (j = 0; j < width; j++) {\
+            wfunc(ptr+4*j, v);\
+        }
+        if (big_endian) {
+            FILL(AV_WB32);
+        } else {
+            FILL(AV_WL32);
+        }
+        ptr += stride;
+    }
+#undef FILL
+}
+
+
 #define MAX_SLICE_PLANES 4
 
 /// Slice plane
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 67440cdb4a..5fb572b51a 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -491,6 +491,34 @@ static int bswap_16bpc(SwsContext *c, const uint8_t *src[],
     return srcSliceH;
 }
 
+static int bswap_32bpc(SwsContext *c, const uint8_t *src[],
+                              int srcStride[], int srcSliceY, int srcSliceH,
+                              uint8_t *dst[], int dstStride[])
+{
+    int i, j, p;
+
+    for (p = 0; p < 4; p++) {
+        int srcstr = srcStride[p] / 4;
+        int dststr = dstStride[p] / 4;
+        uint32_t       *dstPtr =       (uint32_t *) dst[p];
+        const uint32_t *srcPtr = (const uint32_t *) src[p];
+        int min_stride         = FFMIN(FFABS(srcstr), FFABS(dststr));
+        if(!dstPtr || !srcPtr)
+            continue;
+        dstPtr += (srcSliceY >> c->chrDstVSubSample) * dststr;
+        for (i = 0; i < (srcSliceH >> c->chrDstVSubSample); i++) {
+            for (j = 0; j < min_stride; j++) {
+                dstPtr[j] = av_bswap32(srcPtr[j]);
+            }
+            srcPtr += srcstr;
+            dstPtr += dststr;
+        }
+    }
+
+    return srcSliceH;
+}
+
+
 static int palToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[],
                            int srcSliceY, int srcSliceH, uint8_t *dst[],
                            int dstStride[])
@@ -2077,6 +2105,11 @@ void ff_get_unscaled_swscale(SwsContext *c)
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P16))
         c->swscale = bswap_16bpc;
 
+    /* bswap 32 bits per pixel/component formats */
+    if (IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRPF32) ||
+        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRAPF32))
+        c->swscale = bswap_32bpc;
+
     if (usePal(srcFormat) && isByteRGB(dstFormat))
         c->swscale = palToRgbWrapper;
 
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 193efdd881..15c0a19afa 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -236,10 +236,10 @@ static const FormatEntry format_entries[] = {
     [AV_PIX_FMT_GBRP14BE]    = { 1, 1 },
     [AV_PIX_FMT_GBRP16LE]    = { 1, 1 },
     [AV_PIX_FMT_GBRP16BE]    = { 1, 1 },
-    [AV_PIX_FMT_GBRPF32LE]   = { 1, 0 },
-    [AV_PIX_FMT_GBRPF32BE]   = { 1, 0 },
-    [AV_PIX_FMT_GBRAPF32LE]  = { 1, 0 },
-    [AV_PIX_FMT_GBRAPF32BE]  = { 1, 0 },
+    [AV_PIX_FMT_GBRPF32LE]   = { 1, 1 },
+    [AV_PIX_FMT_GBRPF32BE]   = { 1, 1 },
+    [AV_PIX_FMT_GBRAPF32LE]  = { 1, 1 },
+    [AV_PIX_FMT_GBRAPF32BE]  = { 1, 1 },
     [AV_PIX_FMT_GBRAP]       = { 1, 1 },
     [AV_PIX_FMT_GBRAP16LE]   = { 1, 1 },
     [AV_PIX_FMT_GBRAP16BE]   = { 1, 1 },
author	Mark Reid <mindmark@gmail.com>	2020-05-03 16:10:04 -0700
committer	Michael Niedermayer <michael@niedermayer.cc>	2020-05-05 20:06:58 +0200
commit	b4967fc71c63eae8cd96f9c46cd3e1fbd705bbf9 (patch)
tree	3596a2ed253605b9f3f29a7b66dcc755a0c7b482 /libswscale
parent	ba5d0515a6dc0e34d578e5ef94b6ca5f17fc979d (diff)
download	ffmpeg-b4967fc71c63eae8cd96f9c46cd3e1fbd705bbf9.tar.gz