aboutsummaryrefslogtreecommitdiffstats
path: root/tests/checkasm
diff options
context:
space:
mode:
authorAlan Kelly <alankelly@google.com>2021-12-15 10:35:02 +0100
committerJames Almer <jamrial@gmail.com>2021-12-15 20:04:59 -0300
commitf900a19fa94b1a55b660ec2e5c13419d59754bc0 (patch)
tree7fedf4f6b884fd7172a666f1706653b950fc6a90 /tests/checkasm
parente9ba40c5c9a49bc97d16d66c46ff993fa84a6c31 (diff)
downloadffmpeg-f900a19fa94b1a55b660ec2e5c13419d59754bc0.tar.gz
libswscale: Adds ff_hscale8to15_4_avx2 and ff_hscale8to15_X4_avx2 for all filter sizes.
Fixes so that fate under 64 bit Windows passes. These functions replace all ff_hscale8to15_*_ssse3 when avx2 is available. Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'tests/checkasm')
-rw-r--r--tests/checkasm/sw_scale.c20
1 files changed, 15 insertions, 5 deletions
diff --git a/tests/checkasm/sw_scale.c b/tests/checkasm/sw_scale.c
index 1e7ffe0fff..011cb46428 100644
--- a/tests/checkasm/sw_scale.c
+++ b/tests/checkasm/sw_scale.c
@@ -134,13 +134,13 @@ static void check_yuv2yuvX(void)
}
#undef SRC_PIXELS
-#define SRC_PIXELS 128
+#define SRC_PIXELS 512
static void check_hscale(void)
{
#define MAX_FILTER_WIDTH 40
-#define FILTER_SIZES 5
- static const int filter_sizes[FILTER_SIZES] = { 4, 8, 16, 32, 40 };
+#define FILTER_SIZES 6
+ static const int filter_sizes[FILTER_SIZES] = { 4, 8, 12, 16, 32, 40 };
#define HSCALE_PAIRS 2
static const int hscale_pairs[HSCALE_PAIRS][2] = {
@@ -159,6 +159,8 @@ static void check_hscale(void)
// padded
LOCAL_ALIGNED_32(int16_t, filter, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);
LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]);
+ LOCAL_ALIGNED_32(int16_t, filterAvx2, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);
+ LOCAL_ALIGNED_32(int32_t, filterPosAvx, [SRC_PIXELS]);
// The dst parameter here is either int16_t or int32_t but we use void* to
// just cover both cases.
@@ -166,6 +168,8 @@ static void check_hscale(void)
const uint8_t *src, const int16_t *filter,
const int32_t *filterPos, int filterSize);
+ int cpu_flags = av_get_cpu_flags();
+
ctx = sws_alloc_context();
if (sws_init_context(ctx, NULL, NULL) < 0)
fail();
@@ -179,9 +183,11 @@ static void check_hscale(void)
ctx->srcBpc = hscale_pairs[hpi][0];
ctx->dstBpc = hscale_pairs[hpi][1];
ctx->hLumFilterSize = ctx->hChrFilterSize = width;
+ ctx->dstW = ctx->chrDstW = SRC_PIXELS;
for (i = 0; i < SRC_PIXELS; i++) {
filterPos[i] = i;
+ filterPosAvx[i] = i;
// These filter cofficients are chosen to try break two corner
// cases, namely:
@@ -210,16 +216,20 @@ static void check_hscale(void)
filter[SRC_PIXELS * width + i] = rnd();
}
ff_sws_init_scale(ctx);
+ memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));
+ if (cpu_flags & AV_CPU_FLAG_AVX2){
+ ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, SRC_PIXELS);
+ }
if (check_func(ctx->hcScale, "hscale_%d_to_%d_width%d", ctx->srcBpc, ctx->dstBpc + 1, width)) {
memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0]));
call_ref(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
- call_new(NULL, dst1, SRC_PIXELS, src, filter, filterPos, width);
+ call_new(NULL, dst1, SRC_PIXELS, src, filterAvx2, filterPosAvx, width);
if (memcmp(dst0, dst1, SRC_PIXELS * sizeof(dst0[0])))
fail();
- bench_new(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
+ bench_new(NULL, dst0, SRC_PIXELS, src, filter, filterPosAvx, width);
}
}
}