aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul B Mahol <onemda@gmail.com>2022-03-02 22:30:40 +0100
committerPaul B Mahol <onemda@gmail.com>2022-03-03 09:57:53 +0100
commitdae95b3ffd62ed86cd2e3798c2f281aa67969eca (patch)
treea83c2dfe6a1c3f271406a7497ad053f8bfe59bfb
parent59520f068da89006d527f044a6560235260bcc6c (diff)
downloadffmpeg-dae95b3ffd62ed86cd2e3798c2f281aa67969eca.tar.gz
avfilter/vf_maskedmerge: fix rounding when masking
-rw-r--r--libavfilter/maskedmerge.h2
-rw-r--r--libavfilter/vf_maskedmerge.c20
-rw-r--r--libavfilter/x86/vf_maskedmerge.asm17
3 files changed, 23 insertions, 16 deletions
diff --git a/libavfilter/maskedmerge.h b/libavfilter/maskedmerge.h
index 8e2b1cf676..c1cf8027e4 100644
--- a/libavfilter/maskedmerge.h
+++ b/libavfilter/maskedmerge.h
@@ -30,7 +30,7 @@ typedef struct MaskedMergeContext {
int linesize[4];
int nb_planes;
int planes;
- int half, depth;
+ int half, depth, max;
FFFrameSync fs;
void (*maskedmerge)(const uint8_t *bsrc, const uint8_t *osrc,
diff --git a/libavfilter/vf_maskedmerge.c b/libavfilter/vf_maskedmerge.c
index 11492af61f..db0c516938 100644
--- a/libavfilter/vf_maskedmerge.c
+++ b/libavfilter/vf_maskedmerge.c
@@ -96,7 +96,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
base->linesize[p], overlay->linesize[p],
mask->linesize[p], out->linesize[p],
s->width[p], slice_end - slice_start,
- s->half, s->depth);
+ s->half, s->max);
}
return 0;
@@ -138,13 +138,13 @@ static int process_frame(FFFrameSync *fs)
return ff_filter_frame(outlink, out);
}
-#define MASKEDMERGE(n, type, half, shift) \
+#define MASKEDMERGE(n, type, ctype, half, max, div) \
static void maskedmerge##n(const uint8_t *bbsrc, const uint8_t *oosrc, \
const uint8_t *mmsrc, uint8_t *ddst, \
ptrdiff_t blinesize, ptrdiff_t olinesize, \
ptrdiff_t mlinesize, ptrdiff_t dlinesize, \
int w, int h, \
- int hhalf, int sshift) \
+ int hhalf, int mmax) \
{ \
const type *bsrc = (const type *)bbsrc; \
const type *osrc = (const type *)oosrc; \
@@ -158,7 +158,10 @@ static void maskedmerge##n(const uint8_t *bbsrc, const uint8_t *oosrc, \
\
for (int y = 0; y < h; y++) { \
for (int x = 0; x < w; x++) { \
- dst[x] = bsrc[x] + ((msrc[x] * (osrc[x] - bsrc[x]) + half) shift); \
+ const type invm = max - msrc[x]; \
+ const ctype r = ((ctype)(bsrc[x] * invm) + \
+ (ctype)(msrc[x] * osrc[x] + half)) div; \
+ dst[x] = r; \
} \
\
dst += dlinesize; \
@@ -168,9 +171,9 @@ static void maskedmerge##n(const uint8_t *bbsrc, const uint8_t *oosrc, \
} \
}
-MASKEDMERGE(8, uint8_t, 128, >> 8)
-MASKEDMERGE(16, uint16_t, hhalf, >> sshift)
-MASKEDMERGE(32, float, 0.f, + 0.f)
+MASKEDMERGE(8, uint8_t, uint16_t, 127, 255, / 255)
+MASKEDMERGE(16, uint16_t, uint32_t, hhalf, mmax, / mmax)
+MASKEDMERGE(32, float, float, 0.f, 1.f, + 0.f)
static int config_input(AVFilterLink *inlink)
{
@@ -189,7 +192,8 @@ static int config_input(AVFilterLink *inlink)
s->width[0] = s->width[3] = inlink->w;
s->depth = desc->comp[0].depth;
- s->half = (1 << s->depth) / 2;
+ s->max = (1 << s->depth) - 1;
+ s->half = s->max / 2;
if (s->depth == 8)
s->maskedmerge = maskedmerge8;
diff --git a/libavfilter/x86/vf_maskedmerge.asm b/libavfilter/x86/vf_maskedmerge.asm
index 7e61935b97..1028299087 100644
--- a/libavfilter/x86/vf_maskedmerge.asm
+++ b/libavfilter/x86/vf_maskedmerge.asm
@@ -24,26 +24,28 @@
SECTION_RODATA
-pw_128: times 8 dw 128
-pw_256: times 8 dw 256
+pw_127: times 8 dw 127
+pw_255: times 8 dw 255
+pw_32897: times 8 dw 32897
SECTION .text
INIT_XMM sse2
%if ARCH_X86_64
-cglobal maskedmerge8, 8, 11, 7, bsrc, osrc, msrc, dst, blinesize, olinesize, mlinesize, dlinesize, w, h, x
+cglobal maskedmerge8, 8, 11, 8, bsrc, osrc, msrc, dst, blinesize, olinesize, mlinesize, dlinesize, w, h, x
mov wd, dword wm
mov hd, dword hm
%else
-cglobal maskedmerge8, 5, 7, 7, bsrc, osrc, msrc, dst, blinesize, w, x
+cglobal maskedmerge8, 5, 7, 8, bsrc, osrc, msrc, dst, blinesize, w, x
mov wd, r8m
%define olinesizeq r5mp
%define mlinesizeq r6mp
%define dlinesizeq r7mp
%define hd r9mp
%endif
- mova m4, [pw_256]
- mova m5, [pw_128]
+ mova m4, [pw_255]
+ mova m5, [pw_127]
+ mova m7, [pw_32897]
pxor m6, m6
add bsrcq, wq
add osrcq, wq
@@ -66,7 +68,8 @@ cglobal maskedmerge8, 5, 7, 7, bsrc, osrc, msrc, dst, blinesize, w, x
pmullw m1, m3
paddw m1, m2
paddw m1, m5
- psrlw m1, 8
+ pmulhuw m1, m7
+ psrlw m1, 7
packuswb m1, m1
movh [dstq + xq], m1
add xq, mmsize / 2