aboutsummaryrefslogtreecommitdiffstats
path: root/libswscale/loongarch/rgb2rgb_lasx.c
blob: 1b6be9021768e6652e92fee7db78f422c81d5794 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
/*
 * Copyright (c) 2022 Loongson Technology Corporation Limited
 * Contributed by Hao Chen(chenhao@loongson.cn)
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "swscale_loongarch.h"
#include "libavutil/loongarch/loongson_intrinsics.h"

void ff_interleave_bytes_lasx(const uint8_t *src1, const uint8_t *src2,
                              uint8_t *dest, int width, int height,
                              int src1Stride, int src2Stride, int dstStride)
{
    int h;
    int len = width & (0xFFFFFFF0);

    for (h = 0; h < height; h++) {
        int w, index = 0;
        __m256i src_1, src_2, dst;

        for (w = 0; w < len; w += 16) {
            DUP2_ARG2(__lasx_xvld, src1 + w, 0, src2 + w, 0, src_1, src_2);
            src_1 = __lasx_xvpermi_d(src_1, 0xD8);
            src_2 = __lasx_xvpermi_d(src_2, 0xD8);
            dst   = __lasx_xvilvl_b(src_2, src_1);
            __lasx_xvst(dst, dest + index, 0);
            index  += 32;
        }
        for (; w < width; w++) {
            dest[(w << 1) + 0] = src1[w];
            dest[(w << 1) + 1] = src2[w];
        }
        dest += dstStride;
        src1 += src1Stride;
        src2 += src2Stride;
    }
}