aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLynne <dev@lynne.ee>2024-10-06 06:27:16 +0200
committerLynne <dev@lynne.ee>2024-11-18 07:54:22 +0100
commited2391d3410e253bf37e5e7274cc945c866bd337 (patch)
tree6c71f21ec32cab76f25bd65a95001b38056f1fd8
parenta6c58353ac033798fb799cd761e6a78b4fb12d60 (diff)
downloadffmpeg-ed2391d3410e253bf37e5e7274cc945c866bd337.tar.gz
ffv1enc: add a Vulkan encoder
This commit implements a standard, compliant, version 3 and version 4 FFv1 encoder, entirely in Vulkan. The encoder is written in standard GLSL and requires a Vulkan 1.3 supporting GPU with the BDA extension. The encoder can use any amount of slices, but nominally, should use 32x32 slices (1024 in total) to maximize parallelism. All features are supported, as well as all pixel formats. This includes: - Rice - Range coding with a custom quantization table - PCM encoding CRC calculation is also massively parallelized on the GPU. Encoding of unaligned dimensions on subsampled data requires version 4, or requires oversizing the image to 64-pixel alignment and cropping out the padding via container flags. Performance-wise, this makes 1080p real-time screen capture possible at 60fps on even modest GPUs.
-rwxr-xr-xconfigure1
-rw-r--r--libavcodec/Makefile1
-rw-r--r--libavcodec/allcodecs.c1
-rw-r--r--libavcodec/ffv1enc_vulkan.c1604
-rw-r--r--libavcodec/vulkan/Makefile8
-rw-r--r--libavcodec/vulkan/common.comp170
-rw-r--r--libavcodec/vulkan/ffv1_common.comp74
-rw-r--r--libavcodec/vulkan/ffv1_enc.comp67
-rw-r--r--libavcodec/vulkan/ffv1_enc_ac.comp83
-rw-r--r--libavcodec/vulkan/ffv1_enc_common.comp101
-rw-r--r--libavcodec/vulkan/ffv1_enc_rct.comp82
-rw-r--r--libavcodec/vulkan/ffv1_enc_rgb.comp83
-rw-r--r--libavcodec/vulkan/ffv1_enc_setup.comp151
-rw-r--r--libavcodec/vulkan/ffv1_enc_vlc.comp112
-rw-r--r--libavcodec/vulkan/ffv1_reset.comp55
-rw-r--r--libavcodec/vulkan/ffv1_vlc.comp122
-rw-r--r--libavcodec/vulkan/rangecoder.comp190
17 files changed, 2905 insertions, 0 deletions
diff --git a/configure b/configure
index 3a614c76c1..591aa53753 100755
--- a/configure
+++ b/configure
@@ -2951,6 +2951,7 @@ exr_decoder_deps="zlib"
exr_encoder_deps="zlib"
ffv1_decoder_select="rangecoder"
ffv1_encoder_select="rangecoder"
+ffv1_vulkan_encoder_select="vulkan spirv_compiler"
ffvhuff_decoder_select="huffyuv_decoder"
ffvhuff_encoder_select="huffyuv_encoder"
fic_decoder_select="golomb"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 676ff542af..a6e0e0b55e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -370,6 +370,7 @@ OBJS-$(CONFIG_EXR_ENCODER) += exrenc.o float2half.o
OBJS-$(CONFIG_FASTAUDIO_DECODER) += fastaudio.o
OBJS-$(CONFIG_FFV1_DECODER) += ffv1dec.o ffv1.o
OBJS-$(CONFIG_FFV1_ENCODER) += ffv1enc.o ffv1.o
+OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += ffv1enc.o ffv1.o ffv1enc_vulkan.o
OBJS-$(CONFIG_FFWAVESYNTH_DECODER) += ffwavesynth.o
OBJS-$(CONFIG_FIC_DECODER) += fic.o
OBJS-$(CONFIG_FITS_DECODER) += fitsdec.o fits.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index d8a5866435..0b559dfc58 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -116,6 +116,7 @@ extern const FFCodec ff_escape130_decoder;
extern const FFCodec ff_exr_encoder;
extern const FFCodec ff_exr_decoder;
extern const FFCodec ff_ffv1_encoder;
+extern const FFCodec ff_ffv1_vulkan_encoder;
extern const FFCodec ff_ffv1_decoder;
extern const FFCodec ff_ffvhuff_encoder;
extern const FFCodec ff_ffvhuff_decoder;
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
new file mode 100644
index 0000000000..2f776307c1
--- /dev/null
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -0,0 +1,1604 @@
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/crc.h"
+#include "libavutil/vulkan.h"
+#include "libavutil/vulkan_spirv.h"
+
+#include "avcodec.h"
+#include "internal.h"
+#include "hwconfig.h"
+#include "encode.h"
+#include "libavutil/opt.h"
+#include "codec_internal.h"
+
+#include "ffv1.h"
+#include "ffv1enc.h"
+
+/* Parallel Golomb alignment */
+#define LG_ALIGN_W 32
+#define LG_ALIGN_H 32
+
+typedef struct VulkanEncodeFFv1Context {
+ FFV1Context ctx;
+
+ FFVulkanContext s;
+ FFVkQueueFamilyCtx qf;
+ FFVkExecPool exec_pool;
+
+ FFVulkanShader setup;
+ FFVulkanShader reset;
+ FFVulkanShader rct;
+ FFVulkanShader enc;
+
+ /* Constant read-only buffers */
+ FFVkBuffer quant_buf;
+ FFVkBuffer rangecoder_static_buf;
+ FFVkBuffer crc_tab_buf;
+
+ /* Slice data buffer pool */
+ AVBufferPool *slice_data_pool;
+ AVBufferRef *keyframe_slice_data_ref;
+
+ /* Output data buffer */
+ AVBufferPool *out_data_pool;
+
+ /* Temporary data buffer */
+ AVBufferPool *tmp_data_pool;
+
+ /* Slice results buffer */
+ AVBufferPool *results_data_pool;
+
+ /* Intermediate frame pool */
+ AVBufferRef *intermediate_frames_ref;
+
+ /* Representation mode */
+ enum FFVkShaderRepFormat rep_fmt;
+
+ int num_h_slices;
+ int num_v_slices;
+ int force_pcm;
+
+ int is_rgb;
+ int ppi;
+ int chunks;
+} VulkanEncodeFFv1Context;
+
+extern const char *ff_source_common_comp;
+extern const char *ff_source_rangecoder_comp;
+extern const char *ff_source_ffv1_vlc_comp;
+extern const char *ff_source_ffv1_common_comp;
+extern const char *ff_source_ffv1_reset_comp;
+extern const char *ff_source_ffv1_enc_common_comp;
+extern const char *ff_source_ffv1_enc_rct_comp;
+extern const char *ff_source_ffv1_enc_vlc_comp;
+extern const char *ff_source_ffv1_enc_ac_comp;
+extern const char *ff_source_ffv1_enc_setup_comp;
+extern const char *ff_source_ffv1_enc_comp;
+extern const char *ff_source_ffv1_enc_rgb_comp;
+
+typedef struct FFv1VkRCTParameters {
+ int offset;
+ uint8_t planar_rgb;
+ uint8_t transparency;
+ uint8_t padding[2];
+} FFv1VkRCTParameters;
+
+typedef struct FFv1VkResetParameters {
+ VkDeviceAddress slice_state;
+ uint32_t plane_state_size;
+ uint32_t context_count;
+ uint8_t codec_planes;
+ uint8_t key_frame;
+ uint8_t padding[3];
+} FFv1VkResetParameters;
+
+typedef struct FFv1VkParameters {
+ VkDeviceAddress slice_state;
+ VkDeviceAddress scratch_data;
+ VkDeviceAddress out_data;
+
+ int32_t sar[2];
+ uint32_t chroma_shift[2];
+
+ uint32_t slice_size_max;
+ uint32_t plane_state_size;
+ uint32_t context_count;
+ uint32_t crcref;
+
+ uint8_t bits_per_raw_sample;
+ uint8_t context_model;
+ uint8_t version;
+ uint8_t micro_version;
+ uint8_t force_pcm;
+ uint8_t key_frame;
+ uint8_t planes;
+ uint8_t codec_planes;
+ uint8_t transparency;
+ uint8_t colorspace;
+ uint8_t pic_mode;
+ uint8_t ec;
+ uint8_t ppi;
+ uint8_t chunks;
+ uint8_t padding[2];
+} FFv1VkParameters;
+
+static void add_push_data(FFVulkanShader *shd)
+{
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLC(1, u8buf slice_state; );
+ GLSLC(1, u8buf scratch_data; );
+ GLSLC(1, u8buf out_data; );
+ GLSLC(0, );
+ GLSLC(1, ivec2 sar; );
+ GLSLC(1, uvec2 chroma_shift; );
+ GLSLC(0, );
+ GLSLC(1, uint slice_size_max; );
+ GLSLC(1, uint plane_state_size; );
+ GLSLC(1, uint context_count; );
+ GLSLC(1, uint32_t crcref; );
+ GLSLC(0, );
+ GLSLC(1, uint8_t bits_per_raw_sample; );
+ GLSLC(1, uint8_t context_model; );
+ GLSLC(1, uint8_t version; );
+ GLSLC(1, uint8_t micro_version; );
+ GLSLC(1, uint8_t force_pcm; );
+ GLSLC(1, uint8_t key_frame; );
+ GLSLC(1, uint8_t planes; );
+ GLSLC(1, uint8_t codec_planes; );
+ GLSLC(1, uint8_t transparency; );
+ GLSLC(1, uint8_t colorspace; );
+ GLSLC(1, uint8_t pic_mode; );
+ GLSLC(1, uint8_t ec; );
+ GLSLC(1, uint8_t ppi; );
+ GLSLC(1, uint8_t chunks; );
+ GLSLC(1, uint8_t padding[2]; );
+ GLSLC(0, }; );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+}
+
+static int run_rct(AVCodecContext *avctx, FFVkExecContext *exec,
+ AVFrame *enc_in, VkImageView *enc_in_views,
+ AVFrame **intermediate_frame, VkImageView *intermediate_views,
+ VkImageMemoryBarrier2 *img_bar, int *nb_img_bar,
+ VkBufferMemoryBarrier2 *buf_bar, int *nb_buf_bar,
+ FFVkBuffer *slice_data_buf, uint32_t slice_data_size)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFV1Context *f = &fv->ctx;
+ FFVulkanFunctions *vk = &fv->s.vkfn;
+ AVHWFramesContext *src_hwfc = (AVHWFramesContext *)enc_in->hw_frames_ctx->data;
+ FFv1VkRCTParameters pd;
+
+ /* Create a temporaty frame */
+ *intermediate_frame = av_frame_alloc();
+ if (!(*intermediate_frame))
+ return AVERROR(ENOMEM);
+
+ RET(av_hwframe_get_buffer(fv->intermediate_frames_ref,
+ *intermediate_frame, 0));
+
+ RET(ff_vk_exec_add_dep_frame(&fv->s, exec, *intermediate_frame,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+ RET(ff_vk_create_imageviews(&fv->s, exec, intermediate_views,
+ *intermediate_frame,
+ fv->rep_fmt));
+
+ /* Update descriptors */
+ ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->rct,
+ 1, 0, 0,
+ slice_data_buf,
+ 0, slice_data_size*f->slice_count,
+ VK_FORMAT_UNDEFINED);
+ ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct,
+ enc_in, enc_in_views,
+ 1, 1,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+ ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct,
+ *intermediate_frame, intermediate_views,
+ 1, 2,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+
+ ff_vk_frame_barrier(&fv->s, exec, *intermediate_frame, img_bar, nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ /* Prep the input/output images */
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = *nb_img_bar,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = *nb_buf_bar,
+ });
+ *nb_img_bar = 0;
+ if (*nb_buf_bar) {
+ slice_data_buf->stage = buf_bar[0].dstStageMask;
+ slice_data_buf->access = buf_bar[0].dstAccessMask;
+ *nb_buf_bar = 0;
+ }
+
+ /* Run the shader */
+ ff_vk_exec_bind_shader(&fv->s, exec, &fv->rct);
+ pd = (FFv1VkRCTParameters) {
+ .offset = 1 << f->bits_per_raw_sample,
+ .planar_rgb = ff_vk_mt_is_np_rgb(src_hwfc->sw_format) &&
+ (ff_vk_count_images((AVVkFrame *)enc_in->data[0]) > 1),
+ .transparency = f->transparency,
+ };
+ ff_vk_shader_update_push_const(&fv->s, exec, &fv->rct,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd), &pd);
+
+ vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
+
+ /* Add a post-dispatch barrier before encoding */
+ ff_vk_frame_barrier(&fv->s, exec, *intermediate_frame, img_bar, nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+fail:
+ return err;
+}
+
+static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
+ const AVFrame *pict, int *got_packet)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFV1Context *f = &fv->ctx;
+ FFVulkanFunctions *vk = &fv->s.vkfn;
+ FFVkExecContext *exec;
+
+ FFv1VkParameters pd;
+
+ AVFrame *intermediate_frame = NULL;
+
+ /* Temporary data */
+ size_t tmp_data_size;
+ AVBufferRef *tmp_data_ref;
+ FFVkBuffer *tmp_data_buf;
+
+ /* Slice data */
+ AVBufferRef *slice_data_ref;
+ FFVkBuffer *slice_data_buf;
+ uint32_t plane_state_size;
+ uint32_t slice_state_size;
+ uint32_t slice_data_size;
+
+ /* Output data */
+ size_t maxsize;
+ AVBufferRef *out_data_ref;
+ FFVkBuffer *out_data_buf;
+ uint8_t *buf_p;
+
+ /* Results data */
+ AVBufferRef *results_data_ref;
+ FFVkBuffer *results_data_buf;
+ uint32_t *sc;
+
+ int has_inter = avctx->gop_size > 1;
+ uint32_t context_count = f->context_count[f->context_model];
+
+ VkImageView in_views[AV_NUM_DATA_POINTERS];
+ VkImageView intermediate_views[AV_NUM_DATA_POINTERS];
+
+ AVFrame *enc_in = (AVFrame *)pict;
+ VkImageView *enc_in_views = in_views;
+
+ VkMappedMemoryRange invalidate_data[2];
+ int nb_invalidate_data = 0;
+
+ VkImageMemoryBarrier2 img_bar[37];
+ int nb_img_bar = 0;
+ VkBufferMemoryBarrier2 buf_bar[8];
+ int nb_buf_bar = 0;
+
+ if (!pict)
+ return 0;
+
+ exec = ff_vk_exec_get(&fv->s, &fv->exec_pool);
+ ff_vk_exec_start(&fv->s, exec);
+
+ /* Frame state */
+ f->cur_enc_frame = pict;
+ if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) {
+ av_buffer_unref(&fv->keyframe_slice_data_ref);
+ f->key_frame = 1;
+ f->gob_count++;
+ } else {
+ f->key_frame = 0;
+ }
+
+ f->max_slice_count = f->num_h_slices * f->num_v_slices;
+ f->slice_count = f->max_slice_count;
+
+ /* Allocate temporary data buffer */
+ tmp_data_size = f->slice_count*CONTEXT_SIZE;
+ err = ff_vk_get_pooled_buffer(&fv->s, &fv->tmp_data_pool,
+ &tmp_data_ref,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, tmp_data_size,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+ if (err < 0)
+ return err;
+ tmp_data_buf = (FFVkBuffer *)tmp_data_ref->data;
+
+ /* Allocate slice buffer data */
+ if (f->ac == AC_GOLOMB_RICE)
+ plane_state_size = 8;
+ else
+ plane_state_size = CONTEXT_SIZE;
+
+ plane_state_size *= context_count;
+ slice_state_size = plane_state_size*f->plane_count;
+
+ slice_data_size = 256; /* Overestimation for the SliceContext struct */
+ slice_state_size += slice_data_size;
+ slice_state_size = FFALIGN(slice_state_size, 8);
+
+ slice_data_ref = fv->keyframe_slice_data_ref;
+ if (!slice_data_ref) {
+ /* Allocate slice data buffer */
+ err = ff_vk_get_pooled_buffer(&fv->s, &fv->slice_data_pool,
+ &slice_data_ref,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, slice_state_size*f->slice_count,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+ if (err < 0)
+ return err;
+
+ /* Only save it if we're going to use it again */
+ if (has_inter)
+ fv->keyframe_slice_data_ref = slice_data_ref;
+ }
+ slice_data_buf = (FFVkBuffer *)slice_data_ref->data;
+
+ /* Allocate results buffer */
+ err = ff_vk_get_pooled_buffer(&fv->s, &fv->results_data_pool,
+ &results_data_ref,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, 2*f->slice_count*sizeof(uint32_t),
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err < 0)
+ return err;
+ results_data_buf = (FFVkBuffer *)results_data_ref->data;
+
+ /* Output buffer size */
+ maxsize = avctx->width*avctx->height*(1 + f->transparency);
+ if (f->chroma_planes)
+ maxsize += AV_CEIL_RSHIFT(avctx->width, f->chroma_h_shift) *
+ AV_CEIL_RSHIFT(f->height, f->chroma_v_shift)*2;
+ maxsize += f->slice_count * 800;
+ if (f->version > 3) {
+ maxsize *= f->bits_per_raw_sample + 1;
+ } else {
+ maxsize += f->slice_count * 2 * (avctx->width + avctx->height);
+ maxsize *= 8*(2*f->bits_per_raw_sample + 5);
+ }
+ maxsize >>= 3;
+ maxsize += FF_INPUT_BUFFER_MIN_SIZE;
+
+ if (maxsize > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE - 32) {
+ av_log(avctx, AV_LOG_WARNING, "Cannot allocate worst case packet size, "
+ "the encoding could fail\n");
+ maxsize = INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE - 32;
+ }
+
+ /* Allocate output buffer */
+ err = ff_vk_get_pooled_buffer(&fv->s, &fv->out_data_pool,
+ &out_data_ref,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, maxsize,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
+ if (err < 0)
+ return err;
+
+ out_data_buf = (FFVkBuffer *)out_data_ref->data;
+ pkt->data = out_data_buf->mapped_mem;
+ pkt->size = out_data_buf->size;
+ pkt->buf = out_data_ref;
+
+ /* Add dependencies */
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0);
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &results_data_ref, 1, 0);
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &slice_data_ref, 1, has_inter);
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &out_data_ref, 1, 1);
+ RET(ff_vk_exec_add_dep_frame(&fv->s, exec, enc_in,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+ RET(ff_vk_create_imageviews(&fv->s, exec, enc_in_views, enc_in,
+ fv->rep_fmt));
+ ff_vk_frame_barrier(&fv->s, exec, enc_in, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ /* Setup shader needs the original input */
+ ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->setup,
+ 1, 0, 0,
+ slice_data_buf,
+ 0, slice_data_size*f->slice_count,
+ VK_FORMAT_UNDEFINED);
+ ff_vk_shader_update_img_array(&fv->s, exec, &fv->setup,
+ enc_in, enc_in_views,
+ 1, 1,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+
+ /* Add a buffer barrier between previous and current frame */
+ if (!f->key_frame) {
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = slice_data_buf->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = slice_data_buf->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
+ VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = slice_data_buf->buf,
+ .size = VK_WHOLE_SIZE,
+ .offset = 0,
+ };
+ }
+
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
+ nb_img_bar = 0;
+ if (nb_buf_bar) {
+ slice_data_buf->stage = buf_bar[0].dstStageMask;
+ slice_data_buf->access = buf_bar[0].dstAccessMask;
+ nb_buf_bar = 0;
+ }
+
+ /* Run setup shader */
+ ff_vk_exec_bind_shader(&fv->s, exec, &fv->setup);
+ pd = (FFv1VkParameters) {
+ .slice_state = slice_data_buf->address + f->slice_count*256,
+ .scratch_data = tmp_data_buf->address,
+ .out_data = out_data_buf->address,
+ .slice_size_max = out_data_buf->size / f->slice_count,
+ .bits_per_raw_sample = f->bits_per_raw_sample,
+ .sar[0] = pict->sample_aspect_ratio.num,
+ .sar[1] = pict->sample_aspect_ratio.den,
+ .chroma_shift[0] = f->chroma_h_shift,
+ .chroma_shift[1] = f->chroma_v_shift,
+ .plane_state_size = plane_state_size,
+ .context_count = context_count,
+ .crcref = f->crcref,
+ .context_model = fv->ctx.context_model,
+ .version = f->version,
+ .micro_version = f->micro_version,
+ .force_pcm = fv->force_pcm,
+ .key_frame = f->key_frame,
+ .planes = av_pix_fmt_count_planes(avctx->sw_pix_fmt),
+ .codec_planes = f->plane_count,
+ .transparency = f->transparency,
+ .colorspace = f->colorspace,
+ .pic_mode = !(pict->flags & AV_FRAME_FLAG_INTERLACED) ? 3 :
+ !(pict->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) ? 2 : 1,
+ .ec = f->ec,
+ .ppi = fv->ppi,
+ .chunks = fv->chunks,
+ };
+ ff_vk_shader_update_push_const(&fv->s, exec, &fv->setup,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd), &pd);
+ vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
+
+ /* Setup shader modified the slice data buffer */
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = slice_data_buf->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = slice_data_buf->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
+ VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = slice_data_buf->buf,
+ .size = slice_data_size*f->slice_count,
+ .offset = 0,
+ };
+
+ if (f->key_frame || f->version > 3) {
+ FFv1VkResetParameters pd_reset;
+
+ ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->reset,
+ 1, 0, 0,
+ slice_data_buf,
+ 0, slice_data_size*f->slice_count,
+ VK_FORMAT_UNDEFINED);
+
+ /* Run setup shader */
+ ff_vk_exec_bind_shader(&fv->s, exec, &fv->reset);
+ pd_reset = (FFv1VkResetParameters) {
+ .slice_state = slice_data_buf->address + f->slice_count*256,
+ .plane_state_size = plane_state_size,
+ .context_count = context_count,
+ .codec_planes = f->plane_count,
+ .key_frame = f->key_frame,
+ };
+ ff_vk_shader_update_push_const(&fv->s, exec, &fv->reset,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd_reset), &pd_reset);
+
+ /* Sync between setup and reset shaders */
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
+ slice_data_buf->stage = buf_bar[0].dstStageMask;
+ slice_data_buf->access = buf_bar[0].dstAccessMask;
+ nb_buf_bar = 0;
+
+ vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices,
+ f->plane_count);
+ }
+
+ /* Run RCT shader */
+ if (fv->is_rgb) {
+ RET(run_rct(avctx, exec,
+ enc_in, enc_in_views,
+ &intermediate_frame, intermediate_views,
+ img_bar, &nb_img_bar, buf_bar, &nb_buf_bar,
+ slice_data_buf, slice_data_size));
+
+ /* Use the new frame */
+ enc_in = intermediate_frame;
+ enc_in_views = intermediate_views;
+ }
+
+ /* If the reset shader ran, insert a barrier now. */
+ if (f->key_frame || f->version > 3) {
+ /* Reset shader modified the slice data buffer */
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = slice_data_buf->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = slice_data_buf->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
+ VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = slice_data_buf->buf,
+ .size = slice_data_buf->size - slice_data_size*f->slice_count,
+ .offset = slice_data_size*f->slice_count,
+ };
+ }
+
+ /* Final barrier before encoding */
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
+ nb_img_bar = 0;
+ if (nb_buf_bar) {
+ slice_data_buf->stage = buf_bar[0].dstStageMask;
+ slice_data_buf->access = buf_bar[0].dstAccessMask;
+ nb_buf_bar = 0;
+ }
+
+ /* Main encode shader */
+ ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->enc,
+ 1, 0, 0,
+ slice_data_buf,
+ 0, slice_data_size*f->slice_count,
+ VK_FORMAT_UNDEFINED);
+ ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc,
+ enc_in, enc_in_views,
+ 1, 1,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+ ff_vk_shader_update_desc_buffer(&fv->s, exec,
+ &fv->enc, 1, 2, 0,
+ results_data_buf,
+ 0, results_data_buf->size,
+ VK_FORMAT_UNDEFINED);
+
+ ff_vk_exec_bind_shader(&fv->s, exec, &fv->enc);
+ ff_vk_shader_update_push_const(&fv->s, exec, &fv->enc,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd), &pd);
+ vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
+
+ /* Submit */
+ err = ff_vk_exec_submit(&fv->s, exec);
+ if (err < 0)
+ return err;
+
+ /* We need the encoded data immediately */
+ ff_vk_exec_wait(&fv->s, exec);
+ av_frame_free(&intermediate_frame);
+
+ /* Invalidate slice/output data if needed */
+ if (!(results_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
+ invalidate_data[nb_invalidate_data++] = (VkMappedMemoryRange) {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = results_data_buf->mem,
+ .offset = 0,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (!(out_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
+ invalidate_data[nb_invalidate_data++] = (VkMappedMemoryRange) {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = out_data_buf->mem,
+ .offset = 0,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (nb_invalidate_data)
+ vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev,
+ nb_invalidate_data, invalidate_data);
+
+ /* First slice is in-place */
+ buf_p = pkt->data;
+ sc = &((uint32_t *)results_data_buf->mapped_mem)[0];
+ av_log(avctx, AV_LOG_VERBOSE, "Slice size = %u (max %i), src offset = %u\n",
+ sc[0], pkt->size / f->slice_count, sc[1]);
+ av_assert0(sc[0] < pkt->size / f->slice_count);
+ av_assert0(sc[0] < (1 << 24));
+ buf_p += sc[0];
+
+ /* We have to copy the rest */
+ for (int i = 1; i < f->slice_count; i++) {
+ uint32_t bytes;
+ uint8_t *bs_start;
+
+ sc = &((uint32_t *)results_data_buf->mapped_mem)[i*2];
+ bytes = sc[0];
+ bs_start = pkt->data + sc[1];
+
+ av_log(avctx, AV_LOG_VERBOSE, "Slice size = %u (max %i), src offset = %u\n",
+ bytes, pkt->size / f->slice_count, sc[1]);
+ av_assert0(bytes < pkt->size / f->slice_count);
+ av_assert0(bytes < (1 << 24));
+
+ memmove(buf_p, bs_start, bytes);
+
+ buf_p += bytes;
+ }
+
+ f->picture_number++;
+ pkt->size = buf_p - pkt->data;
+ pkt->flags |= AV_PKT_FLAG_KEY * f->key_frame;
+ *got_packet = 1;
+
+ av_log(avctx, AV_LOG_VERBOSE, "Total data = %i\n",
+ pkt->size);
+
+fail:
+ /* Frames added as a dep are always referenced, so we only need to
+ * clean this up. */
+ av_frame_free(&intermediate_frame);
+
+ return 0;
+}
+
+static int init_indirect(AVCodecContext *avctx, enum AVPixelFormat sw_format)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ AVHWFramesContext *frames_ctx;
+ AVVulkanFramesContext *vk_frames;
+
+ fv->intermediate_frames_ref = av_hwframe_ctx_alloc(fv->s.device_ref);
+ if (!fv->intermediate_frames_ref)
+ return AVERROR(ENOMEM);
+
+ frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data;
+ frames_ctx->format = AV_PIX_FMT_VULKAN;
+ frames_ctx->sw_format = sw_format;
+ frames_ctx->width = FFALIGN(fv->s.frames->width, 32);
+ frames_ctx->height = FFALIGN(fv->s.frames->height, 32);
+
+ vk_frames = frames_ctx->hwctx;
+ vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
+ vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT;
+ vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+
+ err = av_hwframe_ctx_init(fv->intermediate_frames_ref);
+ if (err < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n",
+ av_get_pix_fmt_name(sw_format), av_err2str(err));
+ av_buffer_unref(&fv->intermediate_frames_ref);
+ return err;
+ }
+
+ return 0;
+}
+
+static int check_support(AVHWFramesConstraints *constraints,
+ enum AVPixelFormat fmt)
+{
+ for (int i = 0; constraints->valid_sw_formats[i]; i++) {
+ if (constraints->valid_sw_formats[i] == fmt)
+ return 1;
+ }
+ return 0;
+}
+
+static enum AVPixelFormat get_supported_rgb_buffer_fmt(AVCodecContext *avctx)
+{
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+
+ enum AVPixelFormat fmt;
+ AVHWFramesConstraints *constraints;
+ constraints = av_hwdevice_get_hwframe_constraints(fv->s.device_ref,
+ NULL);
+
+ /* What we'd like to optimally have */
+ fmt = fv->ctx.use32bit ?
+ (fv->ctx.transparency ? AV_PIX_FMT_RGBA128 : AV_PIX_FMT_RGB96) :
+ (fv->ctx.transparency ? AV_PIX_FMT_RGBA64 : AV_PIX_FMT_RGB48);
+ if (check_support(constraints, fmt))
+ goto end;
+
+ if (fv->ctx.use32bit) {
+ if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA128)))
+ goto end;
+ } else {
+ if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA64)))
+ goto end;
+
+ if (!fv->ctx.transparency &&
+ check_support(constraints, (fmt = AV_PIX_FMT_RGB96)))
+ goto end;
+
+ if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA128)))
+ goto end;
+ }
+
+ fmt = AV_PIX_FMT_NONE;
+
+end:
+ av_hwframe_constraints_free(&constraints);
+ return fmt;
+}
+
+static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd)
+{
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFV1Context *f = &fv->ctx;
+ int smp_bits = fv->ctx.use32bit ? 32 : 16;
+
+ av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK);
+
+ if (f->ac == AC_GOLOMB_RICE) {
+ av_bprintf(&shd->src, "#define PB_UNALIGNED\n" );
+ av_bprintf(&shd->src, "#define GOLOMB\n" );
+ }
+
+ GLSLF(0, #define TYPE int%i_t ,smp_bits);
+ GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits);
+ GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits);
+ GLSLD(ff_source_common_comp);
+ GLSLD(ff_source_rangecoder_comp);
+
+ if (f->ac == AC_GOLOMB_RICE)
+ GLSLD(ff_source_ffv1_vlc_comp);
+
+ GLSLD(ff_source_ffv1_common_comp);
+}
+
+static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFVulkanShader *shd = &fv->setup;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+
+ RET(ff_vk_shader_init(&fv->s, shd, "ffv1_setup",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2" }, 2,
+ 1, 1, 1,
+ 0));
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ { /* This descriptor is never used */
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0));
+
+ define_shared_code(avctx, shd);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "slice_data_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .buf_content = "SliceContext slice_ctx[1024];",
+ },
+ {
+ .name = "src",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .dimensions = 2,
+ .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format,
+ fv->rep_fmt),
+ .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format),
+ .mem_quali = "readonly",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0));
+
+ add_push_data(shd);
+
+ GLSLD(ff_source_ffv1_enc_setup_comp);
+
+ RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFVulkanShader *shd = &fv->reset;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+ int wg_dim = FFMIN(fv->s.props.properties.limits.maxComputeWorkGroupSize[0], 1024);
+
+ RET(ff_vk_shader_init(&fv->s, shd, "ffv1_reset",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2" }, 2,
+ wg_dim, 1, 1,
+ 0));
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ {
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0));
+
+ define_shared_code(avctx, shd);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "slice_data_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .mem_quali = "readonly",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .buf_content = "SliceContext slice_ctx[1024];",
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0));
+
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLC(1, u8buf slice_state; );
+ GLSLC(1, uint plane_state_size; );
+ GLSLC(1, uint context_count; );
+ GLSLC(1, uint8_t codec_planes; );
+ GLSLC(1, uint8_t key_frame; );
+ GLSLC(1, uint8_t padding[3]; );
+ GLSLC(0, }; );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ GLSLD(ff_source_ffv1_reset_comp);
+
+ RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFVulkanShader *shd = &fv->rct;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+ int wg_count = sqrt(fv->s.props.properties.limits.maxComputeWorkGroupInvocations);
+
+ enum AVPixelFormat intermediate_fmt = get_supported_rgb_buffer_fmt(avctx);
+ if (intermediate_fmt == AV_PIX_FMT_NONE) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to find a supported compatible "
+ "pixel format for RCT buffer!\n");
+ return AVERROR(ENOTSUP);
+ }
+
+ RET(init_indirect(avctx, intermediate_fmt));
+
+ RET(ff_vk_shader_init(&fv->s, shd, "ffv1_rct",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2" }, 2,
+ wg_count, wg_count, 1,
+ 0));
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ {
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0));
+
+ define_shared_code(avctx, shd);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "slice_data_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .mem_quali = "readonly",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .buf_content = "SliceContext slice_ctx[1024];",
+ },
+ {
+ .name = "src",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .dimensions = 2,
+ .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format,
+ fv->rep_fmt),
+ .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format),
+ .mem_quali = "readonly",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ {
+ .name = "dst",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .dimensions = 2,
+ .mem_layout = ff_vk_shader_rep_fmt(intermediate_fmt,
+ fv->rep_fmt),
+ .elems = av_pix_fmt_count_planes(intermediate_fmt),
+ .mem_quali = "writeonly",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
+
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLC(1, int offset; );
+ GLSLC(1, uint8_t planar_rgb; );
+ GLSLC(1, uint8_t transparency; );
+ GLSLC(1, uint8_t padding[2]; );
+ GLSLC(0, }; );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ GLSLD(ff_source_ffv1_enc_rct_comp);
+
+ RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFV1Context *f = &fv->ctx;
+ FFVulkanShader *shd = &fv->enc;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ AVHWFramesContext *frames_ctx = fv->intermediate_frames_ref ?
+ (AVHWFramesContext *)fv->intermediate_frames_ref->data :
+ fv->s.frames;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+
+ RET(ff_vk_shader_init(&fv->s, shd, "ffv1_enc",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2" }, 2,
+ 1, 1, 1,
+ 0));
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ {
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ {
+ .name = "crc_ieee_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint32_t crc_ieee[256];",
+ },
+ };
+
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 1, 0));
+
+ define_shared_code(avctx, shd);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "slice_data_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .buf_content = "SliceContext slice_ctx[1024];",
+ },
+ {
+ .name = "src",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .dimensions = 2,
+ .mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format,
+ fv->rep_fmt),
+ .elems = av_pix_fmt_count_planes(frames_ctx->sw_format),
+ .mem_quali = "readonly",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ {
+ .name = "results_data_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_quali = "writeonly",
+ .buf_content = "uint32_t slice_results[2048];",
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
+
+ add_push_data(shd);
+
+ /* Assemble the shader body */
+ GLSLD(ff_source_ffv1_enc_common_comp);
+
+ if (f->ac == AC_GOLOMB_RICE)
+ GLSLD(ff_source_ffv1_enc_vlc_comp);
+ else
+ GLSLD(ff_source_ffv1_enc_ac_comp);
+
+ if (fv->is_rgb)
+ GLSLD(ff_source_ffv1_enc_rgb_comp);
+ else
+ GLSLD(ff_source_ffv1_enc_comp);
+
+ RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static int init_state_transition_data(AVCodecContext *avctx)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+
+ uint8_t *buf_mapped;
+ size_t buf_len = 512*sizeof(uint8_t);
+
+ RET(ff_vk_create_buf(&fv->s, &fv->rangecoder_static_buf,
+ buf_len,
+ NULL, NULL,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+ RET(ff_vk_map_buffer(&fv->s, &fv->rangecoder_static_buf,
+ &buf_mapped, 0));
+
+ for (int i = 1; i < 256; i++) {
+ buf_mapped[256 + i] = fv->ctx.state_transition[i];
+ buf_mapped[256 - i] = 256 - (int)fv->ctx.state_transition[i];
+ }
+
+ RET(ff_vk_unmap_buffer(&fv->s, &fv->rangecoder_static_buf, 1));
+
+ /* Update descriptors */
+ RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+ &fv->setup, 0, 0, 0,
+ &fv->rangecoder_static_buf,
+ 0, fv->rangecoder_static_buf.size,
+ VK_FORMAT_UNDEFINED));
+ RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+ &fv->enc, 0, 0, 0,
+ &fv->rangecoder_static_buf,
+ 0, fv->rangecoder_static_buf.size,
+ VK_FORMAT_UNDEFINED));
+
+fail:
+ return err;
+}
+
+static int init_quant_table_data(AVCodecContext *avctx)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+
+ int16_t *buf_mapped;
+ size_t buf_len = MAX_QUANT_TABLES*
+ MAX_CONTEXT_INPUTS*
+ MAX_QUANT_TABLE_SIZE*sizeof(int16_t);
+
+ RET(ff_vk_create_buf(&fv->s, &fv->quant_buf,
+ buf_len,
+ NULL, NULL,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+ RET(ff_vk_map_buffer(&fv->s, &fv->quant_buf, (void *)&buf_mapped, 0));
+
+ memcpy(buf_mapped, fv->ctx.quant_tables,
+ sizeof(fv->ctx.quant_tables));
+
+ RET(ff_vk_unmap_buffer(&fv->s, &fv->quant_buf, 1));
+ RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+ &fv->enc, 0, 1, 0,
+ &fv->quant_buf,
+ 0, fv->quant_buf.size,
+ VK_FORMAT_UNDEFINED));
+
+fail:
+ return err;
+}
+
+static int init_crc_table_data(AVCodecContext *avctx)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+
+ uint32_t *buf_mapped;
+ size_t buf_len = 256*sizeof(int32_t);
+
+ RET(ff_vk_create_buf(&fv->s, &fv->crc_tab_buf,
+ buf_len,
+ NULL, NULL,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+ RET(ff_vk_map_buffer(&fv->s, &fv->crc_tab_buf, (void *)&buf_mapped, 0));
+
+ memcpy(buf_mapped, av_crc_get_table(AV_CRC_32_IEEE), buf_len);
+
+ RET(ff_vk_unmap_buffer(&fv->s, &fv->crc_tab_buf, 1));
+ RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+ &fv->enc, 0, 2, 0,
+ &fv->crc_tab_buf,
+ 0, fv->crc_tab_buf.size,
+ VK_FORMAT_UNDEFINED));
+
+fail:
+ return err;
+}
+
+static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFV1Context *f = &fv->ctx;
+ FFVkSPIRVCompiler *spv;
+
+ if ((err = ff_ffv1_common_init(avctx)) < 0)
+ return err;
+
+ if (f->ac == 1)
+ f->ac = AC_RANGE_CUSTOM_TAB;
+
+ err = ff_ffv1_encode_setup_plane_info(avctx, avctx->sw_pix_fmt);
+ if (err < 0)
+ return err;
+
+ /* Target version 3 by default */
+ f->version = 3;
+
+ err = ff_ffv1_encode_init(avctx);
+ if (err < 0)
+ return err;
+
+ /* Rice coding did not support high bit depths */
+ if (f->bits_per_raw_sample > (f->version > 3 ? 16 : 8)) {
+ if (f->ac == AC_GOLOMB_RICE) {
+ av_log(avctx, AV_LOG_WARNING, "bits_per_raw_sample > 8, "
+ "forcing range coder\n");
+ f->ac = AC_RANGE_CUSTOM_TAB;
+ }
+ }
+
+ if (f->version < 4 && avctx->gop_size > 1) {
+ av_log(avctx, AV_LOG_ERROR, "Using inter frames requires version 4 (-level 4)\n");
+ return AVERROR_INVALIDDATA;
+ }
+
+ if (f->version == 4 && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
+ av_log(avctx, AV_LOG_ERROR, "Version 4 is experimental and requires -strict -2\n");
+ return AVERROR_INVALIDDATA;
+ }
+
+ //if (fv->ctx.ac == AC_GOLOMB_RICE) {
+ if (0) {
+ int w_a = FFALIGN(avctx->width, LG_ALIGN_W);
+ int h_a = FFALIGN(avctx->height, LG_ALIGN_H);
+ int w_sl, h_sl;
+
+ /* Pixels per line an invocation handles */
+ int ppi = 0;
+ /* Chunk size */
+ int chunks = 0;
+
+ do {
+ if (ppi < 2)
+ ppi++;
+ chunks++;
+ w_sl = w_a / (LG_ALIGN_W*ppi);
+ h_sl = h_a / (LG_ALIGN_H*chunks);
+ } while (w_sl > MAX_SLICES / h_sl);
+
+ av_log(avctx, AV_LOG_VERBOSE, "Slice config: %ix%i, %i total\n",
+ LG_ALIGN_W*ppi, LG_ALIGN_H*chunks, w_sl*h_sl);
+ av_log(avctx, AV_LOG_VERBOSE, "Horizontal slices: %i (%i pixels per invoc)\n",
+ w_sl, ppi);
+ av_log(avctx, AV_LOG_VERBOSE, "Vertical slices: %i (%i chunks)\n",
+ h_sl, chunks);
+
+ f->num_h_slices = w_sl;
+ f->num_v_slices = h_sl;
+
+ fv->ppi = ppi;
+ fv->chunks = chunks;
+ } else {
+ f->num_h_slices = fv->num_h_slices;
+ f->num_v_slices = fv->num_v_slices;
+
+ if (f->num_h_slices <= 0)
+ f->num_h_slices = 32;
+ if (f->num_v_slices <= 0)
+ f->num_v_slices = 32;
+
+ f->num_h_slices = FFMIN(f->num_h_slices, avctx->width);
+ f->num_v_slices = FFMIN(f->num_v_slices, avctx->height);
+ }
+
+ if ((err = ff_ffv1_write_extradata(avctx)) < 0)
+ return err;
+
+ if (f->version < 4) {
+ if (((f->chroma_h_shift > 0) && (avctx->width % (64 << f->chroma_h_shift))) ||
+ ((f->chroma_v_shift > 0) && (avctx->height % (64 << f->chroma_v_shift)))) {
+ av_log(avctx, AV_LOG_ERROR, "Encoding frames with subsampling and unaligned "
+ "dimensions is only supported in version 4 (-level 4)\n");
+ return AVERROR_PATCHWELCOME;
+ }
+ }
+
+ if (fv->force_pcm) {
+ if (f->version < 4) {
+ av_log(avctx, AV_LOG_ERROR, "PCM coding only supported by version 4 (-level 4)\n");
+ return AVERROR_INVALIDDATA;
+ } else if (f->ac != AC_RANGE_CUSTOM_TAB) {
+ av_log(avctx, AV_LOG_ERROR, "PCM coding requires range coding\n");
+ return AVERROR_INVALIDDATA;
+ }
+ }
+
+ /* Init Vulkan */
+ err = ff_vk_init(&fv->s, avctx, NULL, avctx->hw_frames_ctx);
+ if (err < 0)
+ return err;
+
+ err = ff_vk_qf_init(&fv->s, &fv->qf, VK_QUEUE_COMPUTE_BIT);
+ if (err < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Device has no compute queues!\n");
+ return err;
+ }
+
+ err = ff_vk_exec_pool_init(&fv->s, &fv->qf, &fv->exec_pool,
+ fv->qf.nb_queues*4,
+ 0, 0, 0, NULL);
+ if (err < 0)
+ return err;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ /* Detect the special RGB coding mode */
+ fv->is_rgb = !(f->colorspace == 0 && avctx->sw_pix_fmt != AV_PIX_FMT_YA8) &&
+ !(avctx->sw_pix_fmt == AV_PIX_FMT_YA8);
+
+ /* bits_per_raw_sample use regular unsigned representation,
+ * but in higher bit depths, the data is casted to int16_t */
+ fv->rep_fmt = FF_VK_REP_UINT;
+ if (!fv->is_rgb && f->bits_per_raw_sample > 8)
+ fv->rep_fmt = FF_VK_REP_INT;
+
+ /* Init setup shader */
+ err = init_setup_shader(avctx, spv);
+ if (err < 0) {
+ spv->uninit(&spv);
+ return err;
+ }
+
+ /* Init reset shader */
+ err = init_reset_shader(avctx, spv);
+ if (err < 0) {
+ spv->uninit(&spv);
+ return err;
+ }
+
+ /* Init RCT shader */
+ if (fv->is_rgb) {
+ err = init_rct_shader(avctx, spv);
+ if (err < 0) {
+ spv->uninit(&spv);
+ return err;
+ }
+ }
+
+ /* Encode shader */
+ err = init_encode_shader(avctx, spv);
+ if (err < 0) {
+ spv->uninit(&spv);
+ return err;
+ }
+
+ spv->uninit(&spv);
+
+ /* Range coder data */
+ err = init_state_transition_data(avctx);
+ if (err < 0)
+ return err;
+
+ /* Quantization table data */
+ err = init_quant_table_data(avctx);
+ if (err < 0)
+ return err;
+
+ /* CRC table buffer */
+ err = init_crc_table_data(avctx);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
+{
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+
+ ff_vk_exec_pool_free(&fv->s, &fv->exec_pool);
+
+ ff_vk_shader_free(&fv->s, &fv->enc);
+ ff_vk_shader_free(&fv->s, &fv->rct);
+ ff_vk_shader_free(&fv->s, &fv->reset);
+ ff_vk_shader_free(&fv->s, &fv->setup);
+
+ av_buffer_unref(&fv->intermediate_frames_ref);
+
+ av_buffer_pool_uninit(&fv->results_data_pool);
+
+ av_buffer_pool_uninit(&fv->out_data_pool);
+ av_buffer_pool_uninit(&fv->tmp_data_pool);
+
+ av_buffer_unref(&fv->keyframe_slice_data_ref);
+ av_buffer_pool_uninit(&fv->slice_data_pool);
+
+ ff_vk_free_buf(&fv->s, &fv->quant_buf);
+ ff_vk_free_buf(&fv->s, &fv->rangecoder_static_buf);
+ ff_vk_free_buf(&fv->s, &fv->crc_tab_buf);
+
+ ff_vk_uninit(&fv->s);
+
+ return 0;
+}
+
+#define OFFSET(x) offsetof(VulkanEncodeFFv1Context, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption vulkan_encode_ffv1_options[] = {
+ { "slicecrc", "Protect slices with CRCs", OFFSET(ctx.ec), AV_OPT_TYPE_BOOL,
+ { .i64 = -1 }, -1, 1, VE },
+ { "context", "Context model", OFFSET(ctx.context_model), AV_OPT_TYPE_INT,
+ { .i64 = 0 }, 0, 1, VE },
+ { "coder", "Coder type", OFFSET(ctx.ac), AV_OPT_TYPE_INT,
+ { .i64 = AC_RANGE_CUSTOM_TAB }, -2, 2, VE, .unit = "coder" },
+ { "rice", "Golomb rice", 0, AV_OPT_TYPE_CONST,
+ { .i64 = AC_GOLOMB_RICE }, INT_MIN, INT_MAX, VE, .unit = "coder" },
+ { "range_tab", "Range with custom table", 0, AV_OPT_TYPE_CONST,
+ { .i64 = AC_RANGE_CUSTOM_TAB }, INT_MIN, INT_MAX, VE, .unit = "coder" },
+ { "qtable", "Quantization table", OFFSET(ctx.qtable), AV_OPT_TYPE_INT,
+ { .i64 = -1 }, -1, 2, VE },
+
+ { "slices_h", "Number of horizontal slices", OFFSET(num_h_slices), AV_OPT_TYPE_INT,
+ { .i64 = -1 }, -1, 32, VE },
+ { "slices_v", "Number of vertical slices", OFFSET(num_v_slices), AV_OPT_TYPE_INT,
+ { .i64 = -1 }, -1, 32, VE },
+
+ { "force_pcm", "Code all slices with no prediction", OFFSET(force_pcm), AV_OPT_TYPE_BOOL,
+ { .i64 = 0 }, 0, 1, VE },
+
+ { NULL }
+};
+
+static const FFCodecDefault vulkan_encode_ffv1_defaults[] = {
+ { "g", "1" },
+ { NULL },
+};
+
+static const AVClass vulkan_encode_ffv1_class = {
+ .class_name = "ffv1_vulkan",
+ .item_name = av_default_item_name,
+ .option = vulkan_encode_ffv1_options,
+ .version = LIBAVUTIL_VERSION_INT,
+};
+
+const AVCodecHWConfigInternal *const vulkan_encode_ffv1_hw_configs[] = {
+ HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN),
+ NULL,
+};
+
+const FFCodec ff_ffv1_vulkan_encoder = {
+ .p.name = "ffv1_vulkan",
+ CODEC_LONG_NAME("FFmpeg video codec #1 (Vulkan)"),
+ .p.type = AVMEDIA_TYPE_VIDEO,
+ .p.id = AV_CODEC_ID_FFV1,
+ .priv_data_size = sizeof(VulkanEncodeFFv1Context),
+ .init = &vulkan_encode_ffv1_init,
+ FF_CODEC_ENCODE_CB(vulkan_encode_ffv1_frame),
+ .close = &vulkan_encode_ffv1_close,
+ .p.priv_class = &vulkan_encode_ffv1_class,
+ .p.capabilities = AV_CODEC_CAP_DELAY |
+ AV_CODEC_CAP_HARDWARE |
+ AV_CODEC_CAP_DR1 |
+ AV_CODEC_CAP_ENCODER_FLUSH |
+ AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
+ .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_EOF_FLUSH,
+ .defaults = vulkan_encode_ffv1_defaults,
+ .p.pix_fmts = (const enum AVPixelFormat[]) {
+ AV_PIX_FMT_VULKAN,
+ AV_PIX_FMT_NONE,
+ },
+ .hw_configs = vulkan_encode_ffv1_hw_configs,
+ .p.wrapper_name = "vulkan",
+};
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index 96b4de0092..351332ee44 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -3,6 +3,14 @@ GEN_CLEANSUFFIXES = *.o *.c *.d
clean::
$(RM) $(GEN_CLEANSUFFIXES:%=libavcodec/vulkan/%)
+OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \
+ vulkan/rangecoder.o vulkan/ffv1_vlc.o \
+ vulkan/ffv1_common.o vulkan/ffv1_reset.o \
+ vulkan/ffv1_enc_common.o \
+ vulkan/ffv1_enc_rct.o vulkan/ffv1_enc_setup.o \
+ vulkan/ffv1_enc_vlc.o vulkan/ffv1_enc_ac.o \
+ vulkan/ffv1_enc.o vulkan/ffv1_enc_rgb.o
+
VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
.SECONDARY: $(VULKAN:.comp=.c)
libavcodec/vulkan/%.c: TAG = VULKAN
diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp
new file mode 100644
index 0000000000..deca5d63b1
--- /dev/null
+++ b/libavcodec/vulkan/common.comp
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+layout(buffer_reference, buffer_reference_align = 1) buffer u8buf {
+ uint8_t v;
+};
+
+layout(buffer_reference, buffer_reference_align = 2) buffer u16buf {
+ uint16_t v;
+};
+
+layout(buffer_reference, buffer_reference_align = 4) buffer u32buf {
+ uint32_t v;
+};
+
+layout(buffer_reference, buffer_reference_align = 8) buffer u64buf {
+ uint64_t v;
+};
+
+#define OFFBUF(type, b, l) \
+ type(uint64_t(b) + uint64_t(l))
+
+#define zero_extend(a, p) \
+ ((a) & ((1 << (p)) - 1))
+
+#define sign_extend(val, bits) \
+ bitfieldExtract(val, 0, bits)
+
+#define fold(diff, bits) \
+ sign_extend(diff, bits)
+
+#define mid_pred(a, b, c) \
+ max(min((a), (b)), min(max((a), (b)), (c)))
+
+/* TODO: optimize */
+uint align(uint src, uint a)
+{
+ uint res = src % a;
+ if (res == 0)
+ return src;
+ return src + a - res;
+}
+
+/* TODO: optimize */
+uint64_t align64(uint64_t src, uint64_t a)
+{
+ uint64_t res = src % a;
+ if (res == 0)
+ return src;
+ return src + a - res;
+}
+
+#define reverse4(src) \
+ (pack32(unpack8(uint32_t(src)).wzxy))
+
+uint64_t reverse8(uint64_t src)
+{
+ u32vec2 tmp = unpack32(src);
+ tmp.x = reverse4(tmp.x);
+ tmp.y = reverse4(tmp.y);
+ return pack64(tmp.yx);
+}
+
+#ifdef PB_32
+#define BIT_BUF_TYPE uint32_t
+#define BUF_TYPE u32buf
+#define BUF_REVERSE(src) reverse4(src)
+#define BUF_BITS uint8_t(32)
+#define BUF_BYTES uint8_t(4)
+#define BYTE_EXTRACT(src, byte_off) \
+ (uint8_t(bitfieldExtract((src), ((byte_off) << 3), 8)))
+#else
+#define BIT_BUF_TYPE uint64_t
+#define BUF_TYPE u64buf
+#define BUF_REVERSE(src) reverse8(src)
+#define BUF_BITS uint8_t(64)
+#define BUF_BYTES uint8_t(8)
+#define BYTE_EXTRACT(src, byte_off) \
+ (uint8_t(((src) >> ((byte_off) << 3)) & 0xFF))
+#endif
+
+struct PutBitContext {
+ uint64_t buf_start;
+ uint64_t buf;
+
+ BIT_BUF_TYPE bit_buf;
+ uint8_t bit_left;
+};
+
+void put_bits(inout PutBitContext pb, const uint32_t n, uint32_t value)
+{
+ if (n < pb.bit_left) {
+ pb.bit_buf = (pb.bit_buf << n) | value;
+ pb.bit_left -= uint8_t(n);
+ } else {
+ pb.bit_buf <<= pb.bit_left;
+ pb.bit_buf |= (value >> (n - pb.bit_left));
+
+#ifdef PB_UNALIGNED
+ u8buf bs = u8buf(pb.buf);
+ [[unroll]]
+ for (uint8_t i = uint8_t(0); i < BUF_BYTES; i++)
+ bs[i].v = BYTE_EXTRACT(pb.bit_buf, BUF_BYTES - uint8_t(1) - i);
+#else
+#ifdef DEBUG
+ if ((pb.buf % BUF_BYTES) != 0)
+ debugPrintfEXT("put_bits buffer is not aligned!");
+#endif
+
+ BUF_TYPE bs = BUF_TYPE(pb.buf);
+ bs.v = BUF_REVERSE(pb.bit_buf);
+#endif
+ pb.buf = uint64_t(bs) + BUF_BYTES;
+
+ pb.bit_left += BUF_BITS - uint8_t(n);
+ pb.bit_buf = value;
+ }
+}
+
+uint32_t flush_put_bits(inout PutBitContext pb)
+{
+ /* Align bits to MSBs */
+ if (pb.bit_left < BUF_BITS)
+ pb.bit_buf <<= pb.bit_left;
+
+ if (pb.bit_left < BUF_BITS) {
+ uint to_write = ((BUF_BITS - pb.bit_left) >> 3) + 1;
+
+ u8buf bs = u8buf(pb.buf);
+ for (int i = 0; i < to_write; i++)
+ bs[i].v = BYTE_EXTRACT(pb.bit_buf, BUF_BYTES - uint8_t(1) - i);
+ pb.buf = uint64_t(bs) + BUF_BYTES;
+ }
+
+ pb.bit_left = BUF_BITS;
+ pb.bit_buf = 0x0;
+
+ return uint32_t(pb.buf - pb.buf_start);
+}
+
+void init_put_bits(out PutBitContext pb, u8buf data, uint64_t len)
+{
+ pb.buf_start = uint64_t(data);
+ pb.buf = uint64_t(data);
+
+ pb.bit_buf = 0;
+ pb.bit_left = BUF_BITS;
+}
+
+uint64_t put_bits_count(in PutBitContext pb)
+{
+ return (pb.buf - pb.buf_start)*8 + BUF_BITS - pb.bit_left;
+}
diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.comp
new file mode 100644
index 0000000000..5b4a882367
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_common.comp
@@ -0,0 +1,74 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+struct SliceContext {
+ RangeCoder c;
+
+#ifdef GOLOMB
+ PutBitContext pb; /* 8*8 bytes */
+#endif
+
+ ivec2 slice_dim;
+ ivec2 slice_pos;
+ ivec2 slice_rct_coef;
+
+ uint hdr_len; // only used for golomb
+ int slice_coding_mode;
+};
+
+/* -1, { -1, 0 } */
+int predict(int L, ivec2 top)
+{
+ return mid_pred(L, L + top[1] - top[0], top[1]);
+}
+
+/* { -2, -1 }, { -1, 0, 1 }, 0 */
+int get_context(VTYPE2 cur_l, VTYPE3 top_l, TYPE top2, uint8_t quant_table_idx)
+{
+ const int LT = top_l[0]; /* -1 */
+ const int T = top_l[1]; /* 0 */
+ const int RT = top_l[2]; /* 1 */
+ const int L = cur_l[1]; /* -1 */
+
+ int base = quant_table[quant_table_idx][0][(L - LT) & MAX_QUANT_TABLE_MASK] +
+ quant_table[quant_table_idx][1][(LT - T) & MAX_QUANT_TABLE_MASK] +
+ quant_table[quant_table_idx][2][(T - RT) & MAX_QUANT_TABLE_MASK];
+
+ if ((quant_table[quant_table_idx][3][127] == 0) &&
+ (quant_table[quant_table_idx][4][127] == 0))
+ return base;
+
+ const int TT = top2; /* -2 */
+ const int LL = cur_l[0]; /* -2 */
+ return base +
+ quant_table[quant_table_idx][3][(LL - L) & MAX_QUANT_TABLE_MASK] +
+ quant_table[quant_table_idx][4][(TT - T) & MAX_QUANT_TABLE_MASK];
+}
+
+const uint32_t log2_run[41] = {
+ 0, 0, 0, 0, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 5, 5, 6, 6, 7, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24,
+};
diff --git a/libavcodec/vulkan/ffv1_enc.comp b/libavcodec/vulkan/ffv1_enc.comp
new file mode 100644
index 0000000000..880d3a37f0
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc.comp
@@ -0,0 +1,67 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void encode_slice(inout SliceContext sc, const uint slice_idx)
+{
+ int bits = bits_per_raw_sample;
+
+#ifndef GOLOMB
+ if (sc.slice_coding_mode == 1) {
+ for (int p = 0; p < planes; p++) {
+
+ int h = sc.slice_dim.y;
+ if (p > 0 && p < 3)
+ h >>= chroma_shift.y;
+
+ for (int y = 0; y < h; y++)
+ encode_line_pcm(sc, y, p, 0, bits);
+ }
+ } else
+#endif
+ {
+ uint64_t slice_state_off = uint64_t(slice_state) +
+ slice_idx*plane_state_size*codec_planes;
+
+ for (int p = 0; p < planes; p++) {
+ int run_index = 0;
+
+ int h = sc.slice_dim.y;
+ if (p > 0 && p < 3)
+ h >>= chroma_shift.y;
+
+ for (int y = 0; y < h; y++)
+ encode_line(sc, slice_state_off, y, p, 0, bits, run_index);
+
+ /* For the second chroma plane, reuse the first plane's state */
+ if (p != 1)
+ slice_state_off += plane_state_size;
+ }
+ }
+
+ finalize_slice(sc, slice_idx);
+}
+
+void main(void)
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+ encode_slice(slice_ctx[slice_idx], slice_idx);
+}
diff --git a/libavcodec/vulkan/ffv1_enc_ac.comp b/libavcodec/vulkan/ffv1_enc_ac.comp
new file mode 100644
index 0000000000..0bbf58c5dd
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_ac.comp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void put_rac(inout RangeCoder c, uint64_t state, bool bit)
+{
+ put_rac_norenorm(c, state, bit);
+ if (c.range < 0x100)
+ renorm_encoder(c);
+}
+
+/* Note - only handles signed values */
+void put_symbol(inout RangeCoder c, uint64_t state, int v)
+{
+ bool is_nil = (v == 0);
+ put_rac(c, state, is_nil);
+ if (is_nil)
+ return;
+
+ const int a = abs(v);
+ const int e = findMSB(a);
+
+ state += 1;
+ for (int i = 0; i < e; i++)
+ put_rac(c, state + min(i, 9), true);
+ put_rac(c, state + min(e, 9), false);
+
+ state += 21;
+ for (int i = e - 1; i >= 0; i--)
+ put_rac(c, state + min(i, 9), bool(bitfieldExtract(a, i, 1)));
+
+ put_rac(c, state - 11 + min(e, 10), v < 0);
+}
+
+void encode_line_pcm(inout SliceContext sc, int y, int p, int comp,
+ int bits)
+{
+ ivec2 sp = sc.slice_pos;
+ int w = sc.slice_dim.x;
+ if (p > 0 && p < 3) {
+ w >>= chroma_shift.x;
+ sp >>= chroma_shift;
+ }
+
+ for (int x = 0; x < w; x++) {
+ uint v = imageLoad(src[p], (sp + ivec2(x, y)))[comp];
+ for (int i = (bits - 1); i >= 0; i--)
+ put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1)));
+ }
+}
+
+void encode_line(inout SliceContext sc, uint64_t state,
+ int y, int p, int comp, int bits, const int run_index)
+{
+ ivec2 sp = sc.slice_pos;
+
+ int w = sc.slice_dim.x;
+ if (p > 0 && p < 3) {
+ w >>= chroma_shift.x;
+ sp >>= chroma_shift;
+ }
+
+ for (int x = 0; x < w; x++) {
+ const ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits);
+ put_symbol(sc.c, state + CONTEXT_SIZE*d[0], d[1]);
+ }
+}
diff --git a/libavcodec/vulkan/ffv1_enc_common.comp b/libavcodec/vulkan/ffv1_enc_common.comp
new file mode 100644
index 0000000000..759882f5c9
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_common.comp
@@ -0,0 +1,101 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ivec2 get_diff(ivec2 pos, ivec2 off, int p, int comp, int sw, int bits)
+{
+ const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
+ const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
+
+ TYPE top2 = TYPE(0);
+ if (off.y > 1)
+ top2 = TYPE(imageLoad(src[p], pos + ivec2(0, -2))[comp]);
+
+ VTYPE3 top = VTYPE3(TYPE(0),
+ TYPE(0),
+ TYPE(0));
+ if (off.y > 0 && off != ivec2(0, 1))
+ top[0] = TYPE(imageLoad(src[p], pos + ivec2(-1, -1) + yoff_border1)[comp]);
+ if (off.y > 0) {
+ top[1] = TYPE(imageLoad(src[p], pos + ivec2(0, -1))[comp]);
+ top[2] = TYPE(imageLoad(src[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]);
+ }
+
+ VTYPE3 cur = VTYPE3(TYPE(0),
+ TYPE(0),
+ imageLoad(src[p], pos)[comp]);
+ if (off.x > 0 && off != ivec2(1, 0))
+ cur[0] = TYPE(imageLoad(src[p], pos + ivec2(-2, 0) + yoff_border2)[comp]);
+ if (off != ivec2(0, 0))
+ cur[1] = TYPE(imageLoad(src[p], pos + ivec2(-1, 0) + yoff_border1)[comp]);
+
+ /* context, diff */
+ ivec2 d = ivec2(get_context(VTYPE2(cur), top, top2, context_model),
+ cur[2] - predict(cur[1], VTYPE2(top)));
+
+ if (d[0] < 0)
+ d = -d;
+
+ d[1] = fold(d[1], bits);
+
+ return d;
+}
+
+void finalize_slice(inout SliceContext sc, const uint slice_idx)
+{
+#ifdef GOLOMB
+ uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb);
+#else
+ uint32_t enc_len = rac_terminate(sc.c);
+#endif
+
+ u8buf bs = u8buf(sc.c.bytestream_start);
+
+ /* Append slice length */
+ u8vec4 enc_len_p = unpack8(enc_len);
+ bs[enc_len + 0].v = enc_len_p.z;
+ bs[enc_len + 1].v = enc_len_p.y;
+ bs[enc_len + 2].v = enc_len_p.x;
+ enc_len += 3;
+
+ /* Calculate and write CRC */
+ if (ec != 0) {
+ bs[enc_len].v = uint8_t(0);
+ enc_len++;
+
+ uint32_t crc = crcref;
+ for (int i = 0; i < enc_len; i++)
+ crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);
+
+ if (crcref != 0x00000000)
+ crc ^= 0x8CD88196;
+
+ u8vec4 crc_p = unpack8(crc);
+ bs[enc_len + 0].v = crc_p.x;
+ bs[enc_len + 1].v = crc_p.y;
+ bs[enc_len + 2].v = crc_p.z;
+ bs[enc_len + 3].v = crc_p.w;
+ enc_len += 4;
+ }
+
+ slice_results[slice_idx*2 + 0] = enc_len;
+ slice_results[slice_idx*2 + 1] = uint32_t(uint64_t(bs) - uint64_t(out_data));
+}
diff --git a/libavcodec/vulkan/ffv1_enc_rct.comp b/libavcodec/vulkan/ffv1_enc_rct.comp
new file mode 100644
index 0000000000..ad4cbf805f
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_rct.comp
@@ -0,0 +1,82 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ivec4 load_components(ivec2 pos)
+{
+ if (planar_rgb == 0)
+ return ivec4(imageLoad(src[0], pos));
+
+ ivec4 pix;
+ for (int i = 0; i < (3 + transparency); i++)
+ pix[i] = int(imageLoad(src[i], pos)[0]);
+
+ /* Swizzle out the difference */
+ if (transparency > 0)
+ return pix.brga;
+ return pix.bgra;
+}
+
+void bypass_sample(ivec2 pos)
+{
+ imageStore(dst[0], pos, load_components(pos));
+}
+
+void bypass_block(in SliceContext sc)
+{
+ ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
+ ivec2 end = sc.slice_pos + sc.slice_dim;
+ for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
+ for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
+ bypass_sample(ivec2(x, y));
+}
+
+void transform_sample(ivec2 pos, ivec2 rct_coef)
+{
+ ivec4 pix = load_components(pos);
+ pix.b -= pix.g;
+ pix.r -= pix.g;
+ pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;
+ pix.b += offset;
+ pix.r += offset;
+ imageStore(dst[0], pos, pix);
+}
+
+void transform_block(in SliceContext sc)
+{
+ const ivec2 rct_coef = sc.slice_rct_coef;
+ const ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
+ const ivec2 end = sc.slice_pos + sc.slice_dim;
+
+ for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
+ for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
+ transform_sample(ivec2(x, y), rct_coef);
+}
+
+void main()
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+
+ if (slice_ctx[slice_idx].slice_coding_mode == 1)
+ bypass_block(slice_ctx[slice_idx]);
+ else
+ transform_block(slice_ctx[slice_idx]);
+}
diff --git a/libavcodec/vulkan/ffv1_enc_rgb.comp b/libavcodec/vulkan/ffv1_enc_rgb.comp
new file mode 100644
index 0000000000..c176d94e8b
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_rgb.comp
@@ -0,0 +1,83 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void encode_slice_rgb(inout SliceContext sc, const uint slice_idx)
+{
+ int bits = 9;
+ if (bits != 8 || sc.slice_coding_mode != 0)
+ bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
+
+ int run_index = 0;
+
+#ifndef GOLOMB
+ if (sc.slice_coding_mode == 1) {
+ if (transparency == 1) {
+ for (int y = 0; y < sc.slice_dim.y; y++) {
+ encode_line_pcm(sc, y, 0, 1, bits);
+ encode_line_pcm(sc, y, 0, 2, bits);
+ encode_line_pcm(sc, y, 0, 0, bits);
+ encode_line_pcm(sc, y, 0, 3, bits);
+ }
+ } else {
+ for (int y = 0; y < sc.slice_dim.y; y++) {
+ encode_line_pcm(sc, y, 0, 1, bits);
+ encode_line_pcm(sc, y, 0, 2, bits);
+ encode_line_pcm(sc, y, 0, 0, bits);
+ }
+ }
+ } else
+#endif
+ {
+ uint64_t slice_state_off = uint64_t(slice_state) +
+ slice_idx*plane_state_size*codec_planes;
+
+ if (transparency == 1) {
+ for (int y = 0; y < sc.slice_dim.y; y++) {
+ encode_line(sc, slice_state_off + plane_state_size*0,
+ y, 0, 1, bits, run_index);
+ encode_line(sc, slice_state_off + plane_state_size*1,
+ y, 0, 2, bits, run_index);
+ encode_line(sc, slice_state_off + plane_state_size*1,
+ y, 0, 0, bits, run_index);
+ encode_line(sc, slice_state_off + plane_state_size*2,
+ y, 0, 3, bits, run_index);
+ }
+ } else {
+ for (int y = 0; y < sc.slice_dim.y; y++) {
+ encode_line(sc, slice_state_off + plane_state_size*0,
+ y, 0, 1, bits, run_index);
+ encode_line(sc, slice_state_off + plane_state_size*1,
+ y, 0, 2, bits, run_index);
+ encode_line(sc, slice_state_off + plane_state_size*1,
+ y, 0, 0, bits, run_index);
+ }
+ }
+ }
+
+ finalize_slice(sc, slice_idx);
+}
+
+void main(void)
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+ encode_slice_rgb(slice_ctx[slice_idx], slice_idx);
+}
diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp
new file mode 100644
index 0000000000..d58050f281
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_setup.comp
@@ -0,0 +1,151 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
+{
+ uint mpw = 1 << chroma_shift;
+ uint awidth = align(width, mpw);
+
+ if ((version < 4) || ((version == 4) && (micro_version < 3)))
+ return width * sx / num_h_slices;
+
+ sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw;
+ if (sx == awidth)
+ sx = width;
+
+ return sx;
+}
+
+void init_slice(out SliceContext sc, const uint slice_idx)
+{
+ /* Set coordinates */
+ uvec2 img_size = imageSize(src[0]);
+ uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0,
+ gl_NumWorkGroups.x, chroma_shift.x);
+ uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1,
+ gl_NumWorkGroups.x, chroma_shift.x);
+ uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0,
+ gl_NumWorkGroups.y, chroma_shift.y);
+ uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1,
+ gl_NumWorkGroups.y, chroma_shift.y);
+
+ sc.slice_pos = ivec2(sxs, sys);
+ sc.slice_dim = ivec2(sxe - sxs, sye - sys);
+ sc.slice_rct_coef = ivec2(1, 1);
+
+ rac_init(sc.c,
+ OFFBUF(u8buf, out_data, slice_idx * slice_size_max),
+ slice_size_max);
+}
+
+void put_rac_full(inout RangeCoder c, uint64_t state, bool bit)
+{
+ put_rac_norenorm(c, state, bit);
+ if (c.range < 0x100)
+ renorm_encoder_full(c);
+}
+
+void put_symbol_unsigned(inout RangeCoder c, uint64_t state, uint v)
+{
+ bool is_nil = (v == 0);
+ put_rac_full(c, state, is_nil);
+ if (is_nil)
+ return;
+
+ const int e = findMSB(v);
+
+ state += 1;
+ for (int i = 0; i < e; i++)
+ put_rac_full(c, state + min(i, 9), true);
+ put_rac_full(c, state + min(e, 9), false);
+
+ state += 21;
+ for (int i = e - 1; i >= 0; i--)
+ put_rac_full(c, state + min(i, 9), bool(bitfieldExtract(v, i, 1)));
+}
+
+void write_slice_header(inout SliceContext sc, uint64_t state)
+{
+ u8buf sb = u8buf(state);
+
+ [[unroll]]
+ for (int i = 0; i < CONTEXT_SIZE; i++)
+ sb[i].v = uint8_t(128);
+
+ put_symbol_unsigned(sc.c, state, gl_WorkGroupID.x);
+ put_symbol_unsigned(sc.c, state, gl_WorkGroupID.y);
+ put_symbol_unsigned(sc.c, state, 0);
+ put_symbol_unsigned(sc.c, state, 0);
+
+ for (int i = 0; i < codec_planes; i++)
+ put_symbol_unsigned(sc.c, state, context_model);
+
+ put_symbol_unsigned(sc.c, state, pic_mode);
+ put_symbol_unsigned(sc.c, state, sar.x);
+ put_symbol_unsigned(sc.c, state, sar.y);
+
+ if (version >= 4) {
+ put_rac_full(sc.c, state, sc.slice_coding_mode == 1);
+ put_symbol_unsigned(sc.c, state, sc.slice_coding_mode);
+ if (sc.slice_coding_mode != 1 && colorspace == 1) {
+ put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.y);
+ put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.x);
+ }
+ }
+}
+
+void write_frame_header(inout SliceContext sc, uint64_t state)
+{
+ u8buf sb = u8buf(state);
+ sb.v = uint8_t(128);
+ put_rac_full(sc.c, state, bool(key_frame));
+}
+
+#ifdef GOLOMB
+void init_golomb(inout SliceContext sc)
+{
+ sc.hdr_len = rac_terminate(sc.c);
+ init_put_bits(sc.pb,
+ OFFBUF(u8buf, sc.c.bytestream_start, sc.hdr_len),
+ slice_size_max - sc.hdr_len);
+}
+#endif
+
+void main(void)
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+
+ /* Write slice data */
+ uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE;
+ u8buf sb = u8buf(scratch_state);
+
+ init_slice(slice_ctx[slice_idx], slice_idx);
+
+ if (slice_idx == 0)
+ write_frame_header(slice_ctx[slice_idx], scratch_state);
+
+ write_slice_header(slice_ctx[slice_idx], scratch_state);
+
+#ifdef GOLOMB
+ init_golomb(slice_ctx[slice_idx]);
+#endif
+}
diff --git a/libavcodec/vulkan/ffv1_enc_vlc.comp b/libavcodec/vulkan/ffv1_enc_vlc.comp
new file mode 100644
index 0000000000..7a4d39e307
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_vlc.comp
@@ -0,0 +1,112 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+struct RLEState {
+ int count;
+ int diff;
+ int index;
+ bool mode;
+};
+
+void calc_new_state(inout RLEState state, int context)
+{
+ if (context == 0)
+ state.mode = false;
+
+ if (!state.mode)
+ return;
+
+ if (state.diff > 0) {
+ while (state.count >= (1 << log2_run[state.index])) {
+ state.count -= 1 << log2_run[state.index];
+ state.index++;
+ }
+ if (state.index > 0)
+ state.index--;
+ state.count = 0;
+ state.mode = false;
+ if (state.diff > 0)
+ state.diff--;
+ } else {
+ state.count++;
+ }
+}
+
+void encode_line(inout SliceContext sc, uint64_t state,
+ int y, int p, int comp, int bits, inout int run_index)
+{
+ ivec2 sp = sc.slice_pos;
+
+ int w = sc.slice_dim.x;
+ if (p > 0 && p < 3) {
+ w >>= chroma_shift.x;
+ sp >>= chroma_shift;
+ }
+
+ int run_count = 0;
+ bool run_mode = false;
+
+ for (int x = 0; x < w; x++) {
+ ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits);
+
+ if (d[0] == 0)
+ run_mode = true;
+
+ if (run_mode) {
+ if (d[1] != 0) {
+ /* A very unlikely loop */
+ while (run_count >= 1 << log2_run[run_index]) {
+ run_count -= 1 << log2_run[run_index];
+ run_index++;
+ put_bits(sc.pb, 1, 1);
+ }
+
+ put_bits(sc.pb, 1 + log2_run[run_index], run_count);
+ if (run_index != 0)
+ run_index--;
+ run_count = 0;
+ run_mode = false;
+ if (d[1] > 0)
+ d[1]--;
+ } else {
+ run_count++;
+ }
+ }
+
+ if (!run_mode) {
+ VlcState sb = VlcState(state + VLC_STATE_SIZE*d[0]);
+ Symbol sym = get_vlc_symbol(sb, d[1], bits);
+ put_bits(sc.pb, sym.bits, sym.val);
+ }
+ }
+
+ if (run_mode) {
+ while (run_count >= (1 << log2_run[run_index])) {
+ run_count -= 1 << log2_run[run_index];
+ run_index++;
+ put_bits(sc.pb, 1, 1);
+ }
+
+ if (run_count > 0)
+ put_bits(sc.pb, 1, 1);
+ }
+}
diff --git a/libavcodec/vulkan/ffv1_reset.comp b/libavcodec/vulkan/ffv1_reset.comp
new file mode 100644
index 0000000000..c7c7962850
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_reset.comp
@@ -0,0 +1,55 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void main(void)
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+
+ if (slice_ctx[slice_idx].slice_coding_mode == 0 && key_frame == 0)
+ return;
+
+ uint64_t slice_state_off = uint64_t(slice_state) +
+ slice_idx*plane_state_size*codec_planes;
+
+#ifdef GOLOMB
+ uint64_t start = slice_state_off +
+ (gl_WorkGroupID.z*context_count +
+ gl_LocalInvocationID.x)*VLC_STATE_SIZE;
+ for (uint x = gl_LocalInvocationID.x; x < context_count; x += gl_WorkGroupSize.x) {
+ VlcState sb = VlcState(start);
+ sb.drift = int16_t(0);
+ sb.error_sum = uint16_t(4);
+ sb.bias = int8_t(0);
+ sb.count = uint8_t(1);
+ start += gl_WorkGroupSize.x*VLC_STATE_SIZE;
+ }
+#else
+ uint64_t start = slice_state_off +
+ (gl_WorkGroupID.z*context_count)*CONTEXT_SIZE +
+ (gl_LocalInvocationID.x << 2 /* dwords */); /* Bytes */
+ uint count_total = context_count*(CONTEXT_SIZE /* bytes */ >> 2 /* dwords */);
+ for (uint x = gl_LocalInvocationID.x; x < count_total; x += gl_WorkGroupSize.x) {
+ u32buf(start).v = 0x80808080;
+ start += gl_WorkGroupSize.x*(CONTEXT_SIZE >> 3 /* 1/8th of context */);
+ }
+#endif
+}
diff --git a/libavcodec/vulkan/ffv1_vlc.comp b/libavcodec/vulkan/ffv1_vlc.comp
new file mode 100644
index 0000000000..0a53e035b5
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_vlc.comp
@@ -0,0 +1,122 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define VLC_STATE_SIZE 8
+layout(buffer_reference, buffer_reference_align = VLC_STATE_SIZE) buffer VlcState {
+ uint32_t error_sum;
+ int16_t drift;
+ int8_t bias;
+ uint8_t count;
+};
+
+void update_vlc_state(inout VlcState state, const int v)
+{
+ int drift = state.drift;
+ int count = state.count;
+ int bias = state.bias;
+ state.error_sum += uint16_t(abs(v));
+ drift += v;
+
+ if (count == 128) { // FIXME: variable
+ count >>= 1;
+ drift >>= 1;
+ state.error_sum >>= 1;
+ }
+ count++;
+
+ if (drift <= -count) {
+ bias = max(bias - 1, -128);
+ drift = max(drift + count, -count + 1);
+ } else if (drift > 0) {
+ bias = min(bias + 1, 127);
+ drift = min(drift - count, 0);
+ }
+
+ state.bias = int8_t(bias);
+ state.drift = int16_t(drift);
+ state.count = uint8_t(count);
+}
+
+struct Symbol {
+ uint32_t bits;
+ uint32_t val;
+};
+
+Symbol set_ur_golomb(int i, int k, int limit, int esc_len)
+{
+ int e;
+ Symbol sym;
+
+#ifdef DEBUG
+ if (i < 0)
+ debugPrintfEXT("Error: i is zero!");
+#endif
+
+ e = i >> k;
+ if (e < limit) {
+ sym.bits = e + k + 1;
+ sym.val = (1 << k) + zero_extend(i, k);
+ } else {
+ sym.bits = limit + esc_len;
+ sym.val = i - limit + 1;
+ }
+
+ return sym;
+}
+
+/**
+ * write signed golomb rice code (ffv1).
+ */
+Symbol set_sr_golomb(int i, int k, int limit, int esc_len)
+{
+ int v;
+
+ v = -2 * i - 1;
+ v ^= (v >> 31);
+
+ return set_ur_golomb(v, k, limit, esc_len);
+}
+
+Symbol get_vlc_symbol(inout VlcState state, int v, int bits)
+{
+ int i, k, code;
+ Symbol sym;
+ v = fold(v - int(state.bias), bits);
+
+ i = state.count;
+ k = 0;
+ while (i < state.error_sum) { // FIXME: optimize
+ k++;
+ i += i;
+ }
+
+#ifdef DEBUG
+ if (k > 16)
+ debugPrintfEXT("Error: k > 16!");
+#endif
+
+ code = v ^ ((2 * state.drift + state.count) >> 31);
+
+ update_vlc_state(state, v);
+
+ return set_sr_golomb(code, k, 12, bits);
+}
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
new file mode 100644
index 0000000000..13c135f913
--- /dev/null
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -0,0 +1,190 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+struct RangeCoder {
+ u8buf bytestream_start;
+ u8buf bytestream;
+
+ uint low;
+ uint16_t range;
+ uint8_t outstanding_count;
+ uint8_t outstanding_byte;
+};
+
+/* Full renorm version that can handle outstanding_byte == 0xFF */
+void renorm_encoder_full(inout RangeCoder c)
+{
+ int bs_cnt = 0;
+
+ if (c.outstanding_byte == 0xFF) {
+ c.outstanding_byte = uint8_t(c.low >> 8);
+ } else if (c.low <= 0xFF00) {
+ c.bytestream[bs_cnt++].v = c.outstanding_byte;
+ uint8_t cnt = c.outstanding_count;
+ for (; cnt > 0; cnt--)
+ c.bytestream[bs_cnt++].v = uint8_t(0xFF);
+ c.outstanding_count = uint8_t(0);
+ c.outstanding_byte = uint8_t(c.low >> 8);
+ } else if (c.low >= 0x10000) {
+ c.bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1);
+ uint8_t cnt = c.outstanding_count;
+ for (; cnt > 0; cnt--)
+ c.bytestream[bs_cnt++].v = uint8_t(0x00);
+ c.outstanding_count = uint8_t(0);
+ c.outstanding_byte = uint8_t(bitfieldExtract(c.low, 8, 8));
+ } else {
+ c.outstanding_count++;
+ }
+
+ c.bytestream = OFFBUF(u8buf, c.bytestream, bs_cnt);
+ c.range <<= 8;
+ c.low = bitfieldInsert(0, c.low, 8, 8);
+}
+
+/* Cannot deal with outstanding_byte == -1 in the name of speed */
+void renorm_encoder(inout RangeCoder c)
+{
+ uint8_t oc = c.outstanding_count + uint8_t(1);
+ uint low = c.low;
+
+ c.range <<= 8;
+ c.low = bitfieldInsert(0, low, 8, 8);
+
+ if (low > 0xFF00 && low < 0x10000) {
+ c.outstanding_count = oc;
+ return;
+ }
+
+ u8buf bs = c.bytestream;
+ uint8_t outstanding_byte = c.outstanding_byte;
+
+ c.bytestream = OFFBUF(u8buf, bs, oc);
+ c.outstanding_count = uint8_t(0);
+ c.outstanding_byte = uint8_t(low >> 8);
+
+ uint8_t obs = uint8_t(low > 0xFF00);
+ uint8_t fill = obs - uint8_t(1); /* unsigned underflow */
+
+ bs[0].v = outstanding_byte + obs;
+ for (int i = 1; i < oc; i++)
+ bs[i].v = fill;
+}
+
+void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit)
+{
+ u8buf sb = u8buf(state);
+ uint val = uint(sb.v);
+ uint16_t range1 = uint16_t((uint(c.range) * val) >> 8);
+
+#ifdef DEBUG
+ if (val == 0)
+ debugPrintfEXT("Error: state is zero (addr: 0x%lx)", uint64_t(sb));
+ if (range1 >= c.range)
+ debugPrintfEXT("Error: range1 >= c.range");
+ if (range1 <= 0)
+ debugPrintfEXT("Error: range1 <= 0");
+#endif
+
+ uint16_t diff = c.range - range1;
+ if (bit) {
+ c.low += diff;
+ c.range = range1;
+ } else {
+ c.range = diff;
+ }
+
+ sb.v = zero_one_state[(uint(bit) << 8) + val];
+
+#ifdef DEBUG
+ if (sb.v == 0)
+ debugPrintfEXT("Error: inserted zero state from tab %i idx %i", bit, val);
+#endif
+}
+
+/* Equiprobable bit */
+void put_rac_equi(inout RangeCoder c, bool bit)
+{
+ uint16_t range1 = c.range >> 1;
+
+#ifdef DEBUG
+ if (range1 >= c.range)
+ debugPrintfEXT("Error: range1 >= c.range");
+ if (range1 <= 0)
+ debugPrintfEXT("Error: range1 <= 0");
+#endif
+
+ if (bit) {
+ c.low += c.range - range1;
+ c.range = range1;
+ } else {
+ c.range -= range1;
+ }
+
+ if (c.range < 0x100)
+ renorm_encoder(c);
+}
+
+void put_rac_terminate(inout RangeCoder c)
+{
+ uint16_t range1 = uint16_t((uint(c.range) * 129) >> 8);
+
+#ifdef DEBUG
+ if (range1 >= c.range)
+ debugPrintfEXT("Error: range1 >= c.range");
+ if (range1 <= 0)
+ debugPrintfEXT("Error: range1 <= 0");
+#endif
+
+ c.range -= range1;
+ if (c.range < 0x100)
+ renorm_encoder(c);
+}
+
+/* Return the number of bytes written. */
+uint32_t rac_terminate(inout RangeCoder c)
+{
+ put_rac_terminate(c);
+ c.range = uint16_t(0xFF);
+ c.low += 0xFF;
+ renorm_encoder(c);
+ c.range = uint16_t(0xFF);
+ renorm_encoder(c);
+
+#ifdef DEBUG
+ if (c.low != 0)
+ debugPrintfEXT("Error: c.low != 0");
+ if (c.range < 0x100)
+ debugPrintfEXT("Error: range < 0x100");
+#endif
+
+ return uint32_t(uint64_t(c.bytestream) - uint64_t(c.bytestream_start));
+}
+
+void rac_init(out RangeCoder r, u8buf data, uint64_t buf_size)
+{
+ r.bytestream_start = data;
+ r.bytestream = data;
+ r.low = 0;
+ r.range = uint16_t(0xFF00);
+ r.outstanding_count = uint8_t(0);
+ r.outstanding_byte = uint8_t(0xFF);
+}