1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
|
;******************************************************************************
;* SIMD-optimized HuffYUV functions
;* Copyright (c) 2008 Loren Merritt
;* Copyright (c) 2014 Christophe Gisquet
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
SECTION .text
%include "libavcodec/x86/huffyuvdsp_template.asm"
;------------------------------------------------------------------------------
; void (*add_int16)(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
;------------------------------------------------------------------------------
%macro ADD_INT16 0
cglobal add_int16, 4,4,5, dst, src, mask, w, tmp
test srcq, mmsize-1
jnz .unaligned
test dstq, mmsize-1
jnz .unaligned
INT16_LOOP a, add
.unaligned:
INT16_LOOP u, add
%endmacro
INIT_XMM sse2
ADD_INT16
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
ADD_INT16
%endif
; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src,
; intptr_t w, uint8_t *left)
INIT_XMM sse2
cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left
shl wq, 2
movd m0, [leftq]
lea dstq, [dstq + wq]
lea srcq, [srcq + wq]
LSHIFT m0, mmsize-4
neg wq
.loop:
movu m1, [srcq+wq]
mova m2, m1
LSHIFT m1, 4
paddb m1, m2
pshufd m0, m0, q3333
mova m2, m1
LSHIFT m1, 8
paddb m1, m2
paddb m0, m1
movu [dstq+wq], m0
add wq, mmsize
jl .loop
movd m0, [dstq-4]
movd [leftq], m0
REP_RET
; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top)
INIT_MMX mmxext
cglobal add_hfyu_median_pred_int16, 7,7,0, dst, top, diff, mask, w, left, left_top
add wd, wd
movd mm6, maskd
SPLATW mm6, mm6
movq mm0, [topq]
movq mm2, mm0
movd mm4, [left_topq]
psllq mm2, 16
movq mm1, mm0
por mm4, mm2
movd mm3, [leftq]
psubw mm0, mm4 ; t-tl
add dstq, wq
add topq, wq
add diffq, wq
neg wq
jmp .skip
.loop:
movq mm4, [topq+wq]
movq mm0, mm4
psllq mm4, 16
por mm4, mm1
movq mm1, mm0 ; t
psubw mm0, mm4 ; t-tl
.skip:
movq mm2, [diffq+wq]
%assign i 0
%rep 4
movq mm4, mm0
paddw mm4, mm3 ; t-tl+l
pand mm4, mm6
movq mm5, mm3
pmaxsw mm3, mm1
pminsw mm5, mm1
pminsw mm3, mm4
pmaxsw mm3, mm5 ; median
paddw mm3, mm2 ; +residual
pand mm3, mm6
%if i==0
movq mm7, mm3
psllq mm7, 48
%else
movq mm4, mm3
psrlq mm7, 16
psllq mm4, 48
por mm7, mm4
%endif
%if i<3
psrlq mm0, 16
psrlq mm1, 16
psrlq mm2, 16
%endif
%assign i i+1
%endrep
movq [dstq+wq], mm7
add wq, 8
jl .loop
movzx r2d, word [dstq-2]
mov [leftq], r2d
movzx r2d, word [topq-2]
mov [left_topq], r2d
RET
|