aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/utvideodsp.asm
blob: e44c1ea4719de74176b515aa2ae828bfd2065f26 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
;******************************************************************************
;* SIMD-optimized UTVideo functions
;* Copyright (c) 2017 Paul B Mahol
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************

%include "libavutil/x86/x86util.asm"

SECTION_RODATA

pb_128:  times 16 db 128
pw_512:  times 8  dw 512
pw_1023: times 8  dw 1023

SECTION .text

INIT_XMM sse2

; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
;                         ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b,
;                         int width, int height)
cglobal restore_rgb_planes, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 4, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
    movsxdifnidn wq, wd
    add      src_rq, wq
    add      src_gq, wq
    add      src_bq, wq
    neg          wq
%if ARCH_X86_64 == 0
    mov          wm, wq
DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
%define wq r6m
%define hd r7mp
%endif
    mova         m3, [pb_128]
.nextrow:
    mov          xq, wq

    .loop:
        mova           m0, [src_rq + xq]
        mova           m1, [src_gq + xq]
        mova           m2, [src_bq + xq]
        psubb          m1, m3
        paddb          m0, m1
        paddb          m2, m1
        mova  [src_rq+xq], m0
        mova  [src_bq+xq], m2
        add            xq, mmsize
    jl .loop

    add        src_rq, linesize_rq
    add        src_gq, linesize_gq
    add        src_bq, linesize_bq
    sub        hd, 1
    jg .nextrow
    REP_RET

cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
    shl          wd, 1
    shl linesize_rq, 1
    shl linesize_gq, 1
    shl linesize_bq, 1
    add      src_rq, wq
    add      src_gq, wq
    add      src_bq, wq
    mova         m3, [pw_512]
    mova         m4, [pw_1023]
    neg          wq
%if ARCH_X86_64 == 0
    mov          wm, wq
DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
%define wq r6m
%define hd r7mp
%endif
.nextrow:
    mov          xq, wq

    .loop:
        mova           m0, [src_rq + xq]
        mova           m1, [src_gq + xq]
        mova           m2, [src_bq + xq]
        psubw          m1, m3
        paddw          m0, m1
        paddw          m2, m1
        pand           m0, m4
        pand           m2, m4
        mova  [src_rq+xq], m0
        mova  [src_bq+xq], m2
        add            xq, mmsize
    jl .loop

    add        src_rq, linesize_rq
    add        src_gq, linesize_gq
    add        src_bq, linesize_bq
    sub        hd, 1
    jg .nextrow
    REP_RET