aboutsummaryrefslogtreecommitdiffstats
path: root/libavfilter/x86
diff options
context:
space:
mode:
authorThomas Mundt <tmundt75@gmail.com>2017-08-30 03:37:18 +0200
committerMichael Niedermayer <michael@niedermayer.cc>2017-09-15 22:40:21 +0200
commita7f6bfdc185a04a703bedd712ee306435372af12 (patch)
tree6acb620c777348c07a8fdd6250e23bf082c6e4ed /libavfilter/x86
parent1a85fb7e1eb375b37ee9863ce8e6e7ada1742dbe (diff)
downloadffmpeg-a7f6bfdc185a04a703bedd712ee306435372af12.tar.gz
avfilter/interlace: prevent over-sharpening with the complex low-pass filter
The complex vertical low-pass filter slightly over-sharpens the picture. This becomes visible when several transcodings are cascaded and the error potentises, e.g. some generations of HD->SD SD->HD. To prevent this behaviour the destination pixel must not exceed the source pixel when the average of the pixels above and below is less than the source pixel. And the other way around. Tested and approved in a visual transcoding cascade test by video professionals. SSIM/PSNR test with the first generation of an HD->SD file as a reference against the 6th generation(3 x SD->HD HD->SD): Results without the patch: SSIM Y:0.956508 (13.615881) U:0.991601 (20.757750) V:0.993004 (21.551382) All:0.974405 (15.918463) PSNR y:31.838009 u:48.424280 v:48.962711 average:34.759466 min:31.699297 max:40.857847 Results with the patch: SSIM Y:0.970051 (15.236232) U:0.991883 (20.905857) V:0.993174 (21.658049) All:0.981290 (17.279202) PSNR y:34.412108 u:48.504454 v:48.969496 average:37.264644 min:34.310637 max:42.373392 Signed-off-by: Thomas Mundt <tmundt75@gmail.com> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavfilter/x86')
-rw-r--r--libavfilter/x86/vf_interlace.asm55
1 files changed, 33 insertions, 22 deletions
diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm
index c601fd7bf4..d0fffd293b 100644
--- a/libavfilter/x86/vf_interlace.asm
+++ b/libavfilter/x86/vf_interlace.asm
@@ -63,41 +63,46 @@ REP_RET
%endmacro
%macro LOWPASS_LINE_COMPLEX 0
-cglobal lowpass_line_complex, 5, 5, 7, dst, h, src, mref, pref
- pxor m6, m6
+cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref
+ pxor m7, m7
.loop:
mova m0, [srcq+mrefq]
mova m2, [srcq+prefq]
mova m1, m0
mova m3, m2
- punpcklbw m0, m6
- punpcklbw m2, m6
- punpckhbw m1, m6
- punpckhbw m3, m6
+ punpcklbw m0, m7
+ punpcklbw m2, m7
+ punpckhbw m1, m7
+ punpckhbw m3, m7
paddw m0, m2
paddw m1, m3
+ mova m6, m0
+ mova m5, m1
+ mova m2, [srcq]
+ mova m3, m2
+ punpcklbw m2, m7
+ punpckhbw m3, m7
+ paddw m0, m2
+ paddw m1, m3
+ psllw m2, 1
+ psllw m3, 1
+ paddw m0, m2
+ paddw m1, m3
+ psllw m0, 1
+ psllw m1, 1
+ pcmpgtw m6, m2
+ pcmpgtw m5, m3
+ packsswb m6, m5
mova m2, [srcq+mrefq*2]
mova m4, [srcq+prefq*2]
mova m3, m2
mova m5, m4
- punpcklbw m2, m6
- punpcklbw m4, m6
- punpckhbw m3, m6
- punpckhbw m5, m6
+ punpcklbw m2, m7
+ punpcklbw m4, m7
+ punpckhbw m3, m7
+ punpckhbw m5, m7
paddw m2, m4
paddw m3, m5
- mova m4, [srcq]
- mova m5, m4
- punpcklbw m4, m6
- punpckhbw m5, m6
- paddw m0, m4
- paddw m1, m5
- psllw m0, 1
- psllw m1, 1
- psllw m4, 2
- psllw m5, 2
- paddw m0, m4
- paddw m1, m5
paddw m0, [pw_4]
paddw m1, [pw_4]
psubusw m0, m2
@@ -105,6 +110,12 @@ cglobal lowpass_line_complex, 5, 5, 7, dst, h, src, mref, pref
psrlw m0, 3
psrlw m1, 3
packuswb m0, m1
+ mova m1, m0
+ pmaxub m0, [srcq]
+ pminub m1, [srcq]
+ pand m0, m6
+ pandn m6, m1
+ por m0, m6
mova [dstq], m0
add dstq, mmsize