aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/riscv/h264dsp_init.c
blob: 463ffe7202f744026ad5c85bdd5e82f52210509a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/*
 * Copyright © 2024 Rémi Denis-Courmont.
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "config.h"

#include <stdint.h>
#include <string.h>

#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/riscv/cpu.h"
#include "libavcodec/h264dsp.h"

extern const struct {
    const h264_weight_func weight;
    const h264_biweight_func biweight;
} ff_h264_weight_funcs_8_rvv[];

void ff_h264_v_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride,
                                      int alpha, int beta, int8_t *tc0);
void ff_h264_h_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride,
                                      int alpha, int beta, int8_t *tc0);
void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, ptrdiff_t stride,
                                            int alpha, int beta, int8_t *tc0);

void ff_h264_idct_add_8_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct8_add_8_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_add16_8_rvv(uint8_t *dst, const int *blockoffset,
                              int16_t *block, int stride,
                              const uint8_t nnzc[5 * 8]);
void ff_h264_idct_add16intra_8_rvv(uint8_t *dst, const int *blockoffset,
                                   int16_t *block, int stride,
                                   const uint8_t nnzc[5 * 8]);
void ff_h264_idct8_add4_8_rvv(uint8_t *dst, const int *blockoffset,
                              int16_t *block, int stride,
                              const uint8_t nnzc[5 * 8]);

void ff_h264_idct_add_9_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_add_10_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_add_12_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_add_14_rvv(uint8_t *dst, int16_t *block, int stride);

extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);

av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
                                   const int chroma_format_idc)
{
#if HAVE_RV
    int flags = av_get_cpu_flags();

    if (flags & AV_CPU_FLAG_RVB_BASIC)
        dsp->startcode_find_candidate = ff_startcode_find_candidate_rvb;
# if HAVE_RVV
    if (flags & AV_CPU_FLAG_RVV_I32) {
        const bool zvl128b = ff_rv_vlen_least(128);

        if (bit_depth == 8 && zvl128b) {
            for (int i = 0; i < 4; i++) {
                dsp->weight_h264_pixels_tab[i] =
                    ff_h264_weight_funcs_8_rvv[i].weight;
                dsp->biweight_h264_pixels_tab[i] =
                    ff_h264_weight_funcs_8_rvv[i].biweight;
            }

            dsp->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_8_rvv;
            dsp->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_8_rvv;
            dsp->h264_h_loop_filter_luma_mbaff =
                ff_h264_h_loop_filter_luma_mbaff_8_rvv;

            dsp->h264_idct_add = ff_h264_idct_add_8_rvv;
            dsp->h264_idct8_add = ff_h264_idct8_add_8_rvv;
#  if __riscv_xlen == 64
            dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv;
            dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv;
            dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv;
#  endif
        }

        if (bit_depth == 9 && zvl128b)
            dsp->h264_idct_add = ff_h264_idct_add_9_rvv;
        if (bit_depth == 10 && zvl128b)
            dsp->h264_idct_add = ff_h264_idct_add_10_rvv;
        if (bit_depth == 12 && zvl128b)
            dsp->h264_idct_add = ff_h264_idct_add_12_rvv;
        if (bit_depth == 14 && zvl128b)
            dsp->h264_idct_add = ff_h264_idct_add_14_rvv;

        dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
    }
# endif
#endif
}