aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/vulkan/ffv1_enc_common.comp
blob: 62c0624b0e1f03037daa27c991a4787dc0605c03 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*
 * FFv1 codec
 *
 * Copyright (c) 2024 Lynne <dev@lynne.ee>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

ivec2 get_diff(ivec2 pos, ivec2 off, int p, int comp, int sw, int bits)
{
    const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
    const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);

    TYPE top2 = TYPE(0);
    if (off.y > 1)
        top2 = TYPE(imageLoad(src[p], pos + ivec2(0, -2))[comp]);

    VTYPE3 top  = VTYPE3(TYPE(0),
                         TYPE(0),
                         TYPE(0));
    if (off.y > 0 && off != ivec2(0, 1))
        top[0] = TYPE(imageLoad(src[p], pos + ivec2(-1, -1) + yoff_border1)[comp]);
    if (off.y > 0) {
        top[1] = TYPE(imageLoad(src[p], pos + ivec2(0, -1))[comp]);
        top[2] = TYPE(imageLoad(src[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]);
    }

    VTYPE3 cur = VTYPE3(TYPE(0),
                        TYPE(0),
                        imageLoad(src[p], pos)[comp]);
    if (off.x > 0 && off != ivec2(1, 0))
        cur[0] = TYPE(imageLoad(src[p], pos + ivec2(-2,  0) + yoff_border2)[comp]);
    if (off != ivec2(0, 0))
        cur[1] = TYPE(imageLoad(src[p], pos + ivec2(-1,  0) + yoff_border1)[comp]);

    /* context, diff */
    ivec2 d = ivec2(get_context(VTYPE2(cur), top, top2, context_model),
                    cur[2] - predict(cur[1], VTYPE2(top)));

    if (d[0] < 0)
        d = -d;

    d[1] = fold(d[1], bits);

    return d;
}

void finalize_slice(inout SliceContext sc, const uint slice_idx)
{
#ifdef GOLOMB
    uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb);
#else
    uint32_t enc_len = rac_terminate(sc.c);
#endif

    u8buf bs = u8buf(sc.c.bytestream_start);

    /* Append slice length */
    u8vec4 enc_len_p = unpack8(enc_len);
    bs[enc_len + 0].v = enc_len_p.z;
    bs[enc_len + 1].v = enc_len_p.y;
    bs[enc_len + 2].v = enc_len_p.x;
    enc_len += 3;

    /* Calculate and write CRC */
    if (ec != 0) {
        bs[enc_len].v = uint8_t(0);
        enc_len++;

        uint32_t crc = crcref;
        for (int i = 0; i < enc_len; i++)
            crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);

        if (crcref != 0x00000000)
            crc ^= 0x8CD88196;

        u8vec4 crc_p = unpack8(crc);
        bs[enc_len + 0].v = crc_p.x;
        bs[enc_len + 1].v = crc_p.y;
        bs[enc_len + 2].v = crc_p.z;
        bs[enc_len + 3].v = crc_p.w;
        enc_len += 4;
    }

    slice_results[slice_idx*2 + 0] = enc_len;
    slice_results[slice_idx*2 + 1] = uint64_t(bs) - uint64_t(out_data);
}