swscale/range_convert: fix mpeg ranges in yuv range conversion for non-8-bit pixel formats

There is an issue with the constants used in YUV to YUV range conversion, where the upper bound is not respected when converting to mpeg range. With this commit, the constants are calculated at runtime, depending on the bit depth. This approach also allows us to more easily understand how the constants are derived. For bit depths <= 14, the number of fixed point bits has been set to 14 for all conversions, to simplify the code. For bit depths > 14, the number of fixed points bits has been raised and set to 18, to allow for the conversion to be accurate enough for the mpeg range to be respected. The convert functions now take the conversion constants (coeff and offset) as function arguments. For bit depths <= 14, coeff is unsigned 16-bit and offset is 32-bit. For bit depths > 14, coeff is unsigned 32-bit and offset is 64-bit. x86_64: chrRangeFromJpeg8_1920_c: 2127.4 2125.0 (1.00x) chrRangeFromJpeg16_1920_c: 2325.2 2127.2 (1.09x) chrRangeToJpeg8_1920_c: 3166.9 3168.7 (1.00x) chrRangeToJpeg16_1920_c: 2152.4 3164.8 (0.68x) lumRangeFromJpeg8_1920_c: 1263.0 1302.5 (0.97x) lumRangeFromJpeg16_1920_c: 1080.5 1299.2 (0.83x) lumRangeToJpeg8_1920_c: 1886.8 2112.2 (0.89x) lumRangeToJpeg16_1920_c: 1077.0 1906.5 (0.56x) aarch64 A55: chrRangeFromJpeg8_1920_c: 28835.2 28835.6 (1.00x) chrRangeFromJpeg16_1920_c: 28839.8 32680.8 (0.88x) chrRangeToJpeg8_1920_c: 23074.7 23075.4 (1.00x) chrRangeToJpeg16_1920_c: 17318.9 24996.0 (0.69x) lumRangeFromJpeg8_1920_c: 15389.7 15384.5 (1.00x) lumRangeFromJpeg16_1920_c: 15388.2 17306.7 (0.89x) lumRangeToJpeg8_1920_c: 19227.8 19226.6 (1.00x) lumRangeToJpeg16_1920_c: 15387.0 21146.3 (0.73x) aarch64 A76: chrRangeFromJpeg8_1920_c: 6324.4 6268.1 (1.01x) chrRangeFromJpeg16_1920_c: 6339.9 11521.5 (0.55x) chrRangeToJpeg8_1920_c: 9656.0 9612.8 (1.00x) chrRangeToJpeg16_1920_c: 6340.4 11651.8 (0.54x) lumRangeFromJpeg8_1920_c: 4422.0 4420.8 (1.00x) lumRangeFromJpeg16_1920_c: 4420.9 5762.0 (0.77x) lumRangeToJpeg8_1920_c: 5949.1 5977.5 (1.00x) lumRangeToJpeg16_1920_c: 4446.8 5946.2 (0.75x) NOTE: all simd optimizations for range_convert have been disabled. they will be re-enabled when they are fixed for each architecture. NOTE2: the same issue still exists in rgb2yuv conversions, which is not addressed in this commit.
author: Ramiro Polla <ramiro.polla@gmail.com> 2024-09-18 23:46:06 +0200
committer: Ramiro Polla <ramiro.polla@gmail.com> 2024-12-05 21:10:29 +0100
commit: 384fe39623e932e68fe35af7d5b51fcd0a6c28fb (patch)
tree: 52ae1ce0e6705559739be743975a2ee7024137ed /tests/checkasm
parent: 58bcdeb7425ed7b74f1aac20099cb3c025e6ce8d (diff)
download: ffmpeg-384fe39623e932e68fe35af7d5b51fcd0a6c28fb.tar.gz
1 files changed, 60 insertions, 8 deletions
diff --git a/tests/checkasm/sw_range_convert.c b/tests/checkasm/sw_range_convert.c
index ba576ff08c..3246a3170f 100644
--- a/tests/checkasm/sw_range_convert.c
+++ b/tests/checkasm/sw_range_convert.c
@@ -68,7 +68,8 @@ static void check_lumConvertRange(int from)
     int32_t *dst0_32 = (int32_t *) dst0;
     int32_t *dst1_32 = (int32_t *) dst1;
 
-    declare_func(void, int16_t *dst, int width);
+    declare_func(void, int16_t *dst, int width,
+                       uint32_t coeff, int64_t offset);
 
     sws = sws_alloc_context();
     if (sws_init_context(sws, NULL, NULL) < 0)
@@ -83,6 +84,10 @@ static void check_lumConvertRange(int from)
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
         int bit_depth = desc->comp[0].depth;
         int sample_size = bit_depth == 16 ? sizeof(int32_t) : sizeof(int16_t);
+        int src_shift = bit_depth <= 14 ? 15 - bit_depth : 19 - bit_depth;
+        int mpeg_min = 16 << (bit_depth - 8);
+        int mpeg_max = 235 << (bit_depth - 8);
+        int jpeg_max = (1 << bit_depth) - 1;
         sws->src_format = pix_fmt;
         sws->dst_format = pix_fmt;
         c->dstBpc = bit_depth;
@@ -92,16 +97,37 @@ static void check_lumConvertRange(int from)
             if (check_func(c->lumConvertRange, "%s%d_%d", func_str, bit_depth, width)) {
                 randomize_buffers(dst0, dst1, bit_depth, width);
                 if (bit_depth == 16) {
+                    if (!from) {
+                        dst1_32[0] = dst0_32[0] = mpeg_min << src_shift;
+                        dst1_32[1] = dst0_32[1] = mpeg_max << src_shift;
+                    }
                     dst1_32[2] = dst0_32[2] = -1;
                 } else {
+                    if (!from) {
+                        dst1[0] = dst0[0] = mpeg_min << src_shift;
+                        dst1[1] = dst0[1] = mpeg_max << src_shift;
+                    }
                     dst1[2] = dst0[2] = -1;
                 }
-                call_ref(dst0, width);
-                call_new(dst1, width);
+                call_ref(dst0, width,
+                         c->lumConvertRange_coeff, c->lumConvertRange_offset);
+                call_new(dst1, width,
+                         c->lumConvertRange_coeff, c->lumConvertRange_offset);
                 if (memcmp(dst0, dst1, width * sample_size))
                     fail();
+                if (!from) {
+                    /* check that the mpeg range is respected */
+                    if (bit_depth == 16) {
+                        if ((dst1_32[0] >> src_shift) > 0 || (dst1_32[1] >> src_shift) != jpeg_max)
+                            fail();
+                    } else {
+                        if ((dst1[0] >> src_shift) > 0 || (dst1[1] >> src_shift) != jpeg_max)
+                            fail();
+                    }
+                }
                 if (width == LARGEST_INPUT_SIZE && (bit_depth == 8 || bit_depth == 16))
-                    bench_new(dst1, width);
+                    bench_new(dst1, width,
+                              c->lumConvertRange_coeff, c->lumConvertRange_offset);
             }
         }
     }
@@ -125,7 +151,8 @@ static void check_chrConvertRange(int from)
     int32_t *dstU0_32 = (int32_t *) dstU0;
     int32_t *dstU1_32 = (int32_t *) dstU1;
 
-    declare_func(void, int16_t *dstU, int16_t *dstV, int width);
+    declare_func(void, int16_t *dstU, int16_t *dstV, int width,
+                       uint32_t coeff, int64_t offset);
 
     sws = sws_alloc_context();
     if (sws_init_context(sws, NULL, NULL) < 0)
@@ -140,6 +167,10 @@ static void check_chrConvertRange(int from)
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
         int bit_depth = desc->comp[0].depth;
         int sample_size = bit_depth == 16 ? sizeof(int32_t) : sizeof(int16_t);
+        int src_shift = bit_depth <= 14 ? 15 - bit_depth : 19 - bit_depth;
+        int mpeg_min = 16 << (bit_depth - 8);
+        int mpeg_max = 240 << (bit_depth - 8);
+        int jpeg_max = (1 << bit_depth) - 1;
         sws->src_format = pix_fmt;
         sws->dst_format = pix_fmt;
         c->dstBpc = bit_depth;
@@ -150,17 +181,38 @@ static void check_chrConvertRange(int from)
                 randomize_buffers(dstU0, dstU1, bit_depth, width);
                 randomize_buffers(dstV0, dstV1, bit_depth, width);
                 if (bit_depth == 16) {
+                    if (!from) {
+                        dstU1_32[0] = dstU0_32[0] = mpeg_min << src_shift;
+                        dstU1_32[1] = dstU0_32[1] = mpeg_max << src_shift;
+                    }
                     dstU1_32[2] = dstU0_32[2] = -1;
                 } else {
+                    if (!from) {
+                        dstU1[0] = dstU0[0] = mpeg_min << src_shift;
+                        dstU1[1] = dstU0[1] = mpeg_max << src_shift;
+                    }
                     dstU1[2] = dstU0[2] = -1;
                 }
-                call_ref(dstU0, dstV0, width);
-                call_new(dstU1, dstV1, width);
+                call_ref(dstU0, dstV0, width,
+                         c->chrConvertRange_coeff, c->chrConvertRange_offset);
+                call_new(dstU1, dstV1, width,
+                         c->chrConvertRange_coeff, c->chrConvertRange_offset);
                 if (memcmp(dstU0, dstU1, width * sample_size) ||
                     memcmp(dstV0, dstV1, width * sample_size))
                     fail();
+                if (!from) {
+                    /* check that the mpeg range is respected */
+                    if (bit_depth == 16) {
+                        if ((dstU1_32[0] >> src_shift) > 0 || (dstU1_32[1] >> src_shift) != jpeg_max)
+                            fail();
+                    } else {
+                        if ((dstU1[0] >> src_shift) > 0 || (dstU1[1] >> src_shift) != jpeg_max)
+                            fail();
+                    }
+                }
                 if (width == LARGEST_INPUT_SIZE && (bit_depth == 8 || bit_depth == 16))
-                    bench_new(dstU1, dstV1, width);
+                    bench_new(dstU1, dstV1, width,
+                              c->chrConvertRange_coeff, c->chrConvertRange_offset);
             }
         }
     }
author	Ramiro Polla <ramiro.polla@gmail.com>	2024-09-18 23:46:06 +0200
committer	Ramiro Polla <ramiro.polla@gmail.com>	2024-12-05 21:10:29 +0100
commit	384fe39623e932e68fe35af7d5b51fcd0a6c28fb (patch)
tree	52ae1ce0e6705559739be743975a2ee7024137ed /tests/checkasm
parent	58bcdeb7425ed7b74f1aac20099cb3c025e6ce8d (diff)
download	ffmpeg-384fe39623e932e68fe35af7d5b51fcd0a6c28fb.tar.gz