1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
/*
*
* Original MPlayer filters by Richard Felker.
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "libavfilter/vf_eq.h"
#if HAVE_MMX_INLINE && HAVE_6REGS
static void process_MMX(EQParameters *param, uint8_t *dst, int dst_stride,
uint8_t *src, int src_stride, int w, int h)
{
int i;
int pel;
int dstep = dst_stride - w;
int sstep = src_stride - w;
short brvec[4];
short contvec[4];
int contrast = (int) (param->contrast * 256 * 16);
int brightness = ((int) (100.0 * param->brightness + 100.0) * 511) / 200 - 128 - contrast / 32;
brvec[0] = brvec[1] = brvec[2] = brvec[3] = brightness;
contvec[0] = contvec[1] = contvec[2] = contvec[3] = contrast;
while (h--) {
__asm__ volatile (
"movq (%5), %%mm3 \n\t"
"movq (%6), %%mm4 \n\t"
"pxor %%mm0, %%mm0 \n\t"
"movl %4, %%eax \n\t"
".p2align 4 \n\t"
"1: \n\t"
"movq (%0), %%mm1 \n\t"
"movq (%0), %%mm2 \n\t"
"punpcklbw %%mm0, %%mm1\n\t"
"punpckhbw %%mm0, %%mm2\n\t"
"psllw $4, %%mm1 \n\t"
"psllw $4, %%mm2 \n\t"
"pmulhw %%mm4, %%mm1 \n\t"
"pmulhw %%mm4, %%mm2 \n\t"
"paddw %%mm3, %%mm1 \n\t"
"paddw %%mm3, %%mm2 \n\t"
"packuswb %%mm2, %%mm1 \n\t"
"add $8, %0 \n\t"
"movq %%mm1, (%1) \n\t"
"add $8, %1 \n\t"
"decl %%eax \n\t"
"jnz 1b \n\t"
: "=r" (src), "=r" (dst)
: "0" (src), "1" (dst), "r" (w>>3), "r" (brvec), "r" (contvec)
: "%eax"
);
for (i = w&7; i; i--) {
pel = ((*src++ * contrast) >> 12) + brightness;
if (pel & 768)
pel = (-pel) >> 31;
*dst++ = pel;
}
src += sstep;
dst += dstep;
}
__asm__ volatile ( "emms \n\t" ::: "memory" );
}
#endif
av_cold void ff_eq_init_x86(EQContext *eq)
{
#if HAVE_MMX_INLINE && HAVE_6REGS
int cpu_flags = av_get_cpu_flags();
if (cpu_flags & AV_CPU_FLAG_MMX) {
eq->process = process_MMX;
}
#endif
}
|