diff options
author | Alan Kelly <alankelly@google.com> | 2021-12-15 10:35:02 +0100 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2021-12-15 20:04:59 -0300 |
commit | f900a19fa94b1a55b660ec2e5c13419d59754bc0 (patch) | |
tree | 7fedf4f6b884fd7172a666f1706653b950fc6a90 /tests/checkasm | |
parent | e9ba40c5c9a49bc97d16d66c46ff993fa84a6c31 (diff) | |
download | ffmpeg-f900a19fa94b1a55b660ec2e5c13419d59754bc0.tar.gz |
libswscale: Adds ff_hscale8to15_4_avx2 and ff_hscale8to15_X4_avx2 for all filter sizes.
Fixes so that fate under 64 bit Windows passes.
These functions replace all ff_hscale8to15_*_ssse3 when avx2 is available.
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'tests/checkasm')
-rw-r--r-- | tests/checkasm/sw_scale.c | 20 |
1 files changed, 15 insertions, 5 deletions
diff --git a/tests/checkasm/sw_scale.c b/tests/checkasm/sw_scale.c index 1e7ffe0fff..011cb46428 100644 --- a/tests/checkasm/sw_scale.c +++ b/tests/checkasm/sw_scale.c @@ -134,13 +134,13 @@ static void check_yuv2yuvX(void) } #undef SRC_PIXELS -#define SRC_PIXELS 128 +#define SRC_PIXELS 512 static void check_hscale(void) { #define MAX_FILTER_WIDTH 40 -#define FILTER_SIZES 5 - static const int filter_sizes[FILTER_SIZES] = { 4, 8, 16, 32, 40 }; +#define FILTER_SIZES 6 + static const int filter_sizes[FILTER_SIZES] = { 4, 8, 12, 16, 32, 40 }; #define HSCALE_PAIRS 2 static const int hscale_pairs[HSCALE_PAIRS][2] = { @@ -159,6 +159,8 @@ static void check_hscale(void) // padded LOCAL_ALIGNED_32(int16_t, filter, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]); LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]); + LOCAL_ALIGNED_32(int16_t, filterAvx2, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]); + LOCAL_ALIGNED_32(int32_t, filterPosAvx, [SRC_PIXELS]); // The dst parameter here is either int16_t or int32_t but we use void* to // just cover both cases. @@ -166,6 +168,8 @@ static void check_hscale(void) const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize); + int cpu_flags = av_get_cpu_flags(); + ctx = sws_alloc_context(); if (sws_init_context(ctx, NULL, NULL) < 0) fail(); @@ -179,9 +183,11 @@ static void check_hscale(void) ctx->srcBpc = hscale_pairs[hpi][0]; ctx->dstBpc = hscale_pairs[hpi][1]; ctx->hLumFilterSize = ctx->hChrFilterSize = width; + ctx->dstW = ctx->chrDstW = SRC_PIXELS; for (i = 0; i < SRC_PIXELS; i++) { filterPos[i] = i; + filterPosAvx[i] = i; // These filter cofficients are chosen to try break two corner // cases, namely: @@ -210,16 +216,20 @@ static void check_hscale(void) filter[SRC_PIXELS * width + i] = rnd(); } ff_sws_init_scale(ctx); + memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH)); + if (cpu_flags & AV_CPU_FLAG_AVX2){ + ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, SRC_PIXELS); + } if (check_func(ctx->hcScale, "hscale_%d_to_%d_width%d", ctx->srcBpc, ctx->dstBpc + 1, width)) { memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0])); memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0])); call_ref(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width); - call_new(NULL, dst1, SRC_PIXELS, src, filter, filterPos, width); + call_new(NULL, dst1, SRC_PIXELS, src, filterAvx2, filterPosAvx, width); if (memcmp(dst0, dst1, SRC_PIXELS * sizeof(dst0[0]))) fail(); - bench_new(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width); + bench_new(NULL, dst0, SRC_PIXELS, src, filter, filterPosAvx, width); } } } |