diff options
author | Roman Arzumanyan <rarzumanyan@nvidia.com> | 2022-09-10 11:05:56 +0300 |
---|---|---|
committer | Timo Rothenpieler <timo@rothenpieler.org> | 2022-09-13 22:59:30 +0200 |
commit | cc81ab283c72921a23f7dc149c6a2b386eaf77c6 (patch) | |
tree | 31cd9da00d55a59d453a8d327ce6d0bfbf325fb2 | |
parent | c177108ae1144fd4e6cedb4a702260dbaa179825 (diff) | |
download | ffmpeg-cc81ab283c72921a23f7dc149c6a2b386eaf77c6.tar.gz |
libavfilter: add vf_colorrange_cuda, CUDA-accelerated color conversion filter
Signed-off-by: Timo Rothenpieler <timo@rothenpieler.org>
-rwxr-xr-x | configure | 6 | ||||
-rw-r--r-- | doc/filters.texi | 32 | ||||
-rw-r--r-- | libavfilter/Makefile | 3 | ||||
-rw-r--r-- | libavfilter/allfilters.c | 1 | ||||
-rw-r--r-- | libavfilter/version.h | 2 | ||||
-rw-r--r-- | libavfilter/vf_colorspace_cuda.c | 435 | ||||
-rw-r--r-- | libavfilter/vf_colorspace_cuda.cu | 94 |
7 files changed, 570 insertions, 3 deletions
@@ -3149,10 +3149,12 @@ qsvvpp_select="qsv" vaapi_encode_deps="vaapi" v4l2_m2m_deps="linux_videodev2_h sem_timedwait" -chromakey_cuda_filter_deps="ffnvcodec" -chromakey_cuda_filter_deps_any="cuda_nvcc cuda_llvm" bilateral_cuda_filter_deps="ffnvcodec" bilateral_cuda_filter_deps_any="cuda_nvcc cuda_llvm" +chromakey_cuda_filter_deps="ffnvcodec" +chromakey_cuda_filter_deps_any="cuda_nvcc cuda_llvm" +colorspace_cuda_filter_deps="ffnvcodec" +colorspace_cuda_filter_deps_any="cuda_nvcc cuda_llvm" hwupload_cuda_filter_deps="ffnvcodec" scale_npp_filter_deps="ffnvcodec libnpp" scale2ref_npp_filter_deps="ffnvcodec libnpp" diff --git a/doc/filters.texi b/doc/filters.texi index dbc08163d8..6aa350a63c 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -9725,6 +9725,38 @@ For example to convert the input to SMPTE-240M, use the command: colorspace=smpte240m @end example +@section colorspace_cuda + +CUDA accelerated implementation of the colorspace filter. + +It is by no means feature complete compared to the software colorspace filter, +and at the current time only supports color range conversion between jpeg/full +and mpeg/limited range. + +The filter accepts the following options: + +@table @option +@item range +Specify output color range. + +The accepted values are: +@table @samp +@item tv +TV (restricted) range + +@item mpeg +MPEG (restricted) range + +@item pc +PC (full) range + +@item jpeg +JPEG (full) range + +@end table + +@end table + @section colortemperature Adjust color temperature in video to simulate variations in ambient color temperature. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 841ec47141..ff2a06c262 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -230,6 +230,9 @@ OBJS-$(CONFIG_COLORLEVELS_FILTER) += vf_colorlevels.o OBJS-$(CONFIG_COLORMAP_FILTER) += vf_colormap.o OBJS-$(CONFIG_COLORMATRIX_FILTER) += vf_colormatrix.o OBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o colorspacedsp.o +OBJS-$(CONFIG_COLORSPACE_CUDA_FILTER) += vf_colorspace_cuda.o \ + vf_colorspace_cuda.ptx.o \ + cuda/load_helper.o OBJS-$(CONFIG_COLORTEMPERATURE_FILTER) += vf_colortemperature.o OBJS-$(CONFIG_CONVOLUTION_FILTER) += vf_convolution.o OBJS-$(CONFIG_CONVOLUTION_OPENCL_FILTER) += vf_convolution_opencl.o opencl.o \ diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 79e8a16bbc..119de40b25 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -213,6 +213,7 @@ extern const AVFilter ff_vf_colorlevels; extern const AVFilter ff_vf_colormap; extern const AVFilter ff_vf_colormatrix; extern const AVFilter ff_vf_colorspace; +extern const AVFilter ff_vf_colorspace_cuda; extern const AVFilter ff_vf_colortemperature; extern const AVFilter ff_vf_convolution; extern const AVFilter ff_vf_convolution_opencl; diff --git a/libavfilter/version.h b/libavfilter/version.h index fc0df70dee..5aac9c513a 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -31,7 +31,7 @@ #include "version_major.h" -#define LIBAVFILTER_VERSION_MINOR 48 +#define LIBAVFILTER_VERSION_MINOR 49 #define LIBAVFILTER_VERSION_MICRO 100 diff --git a/libavfilter/vf_colorspace_cuda.c b/libavfilter/vf_colorspace_cuda.c new file mode 100644 index 0000000000..131c4ad72b --- /dev/null +++ b/libavfilter/vf_colorspace_cuda.c @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <string.h> + +#include "libavutil/avstring.h" +#include "libavutil/common.h" +#include "libavutil/cuda_check.h" +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/internal.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" + +#include "avfilter.h" +#include "formats.h" +#include "internal.h" +#include "scale_eval.h" +#include "video.h" + +#include "cuda/load_helper.h" + +static const enum AVPixelFormat supported_formats[] = { + AV_PIX_FMT_NV12, + AV_PIX_FMT_YUV420P, + AV_PIX_FMT_YUV444P, +}; + +#define DIV_UP(a, b) (((a) + (b)-1) / (b)) +#define BLOCKX 32 +#define BLOCKY 16 + +#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) + +typedef struct CUDAColorspaceContext { + const AVClass* class; + + AVCUDADeviceContext* hwctx; + AVBufferRef* frames_ctx; + AVFrame* own_frame; + AVFrame* tmp_frame; + + CUcontext cu_ctx; + CUstream cu_stream; + CUmodule cu_module; + CUfunction cu_convert[AVCOL_RANGE_NB]; + + enum AVPixelFormat pix_fmt; + enum AVColorRange range; + + int num_planes; +} CUDAColorspaceContext; + +static av_cold int cudacolorspace_init(AVFilterContext* ctx) +{ + CUDAColorspaceContext* s = ctx->priv; + + s->own_frame = av_frame_alloc(); + if (!s->own_frame) + return AVERROR(ENOMEM); + + s->tmp_frame = av_frame_alloc(); + if (!s->tmp_frame) + return AVERROR(ENOMEM); + + return 0; +} + +static av_cold void cudacolorspace_uninit(AVFilterContext* ctx) +{ + CUDAColorspaceContext* s = ctx->priv; + + if (s->hwctx && s->cu_module) { + CudaFunctions* cu = s->hwctx->internal->cuda_dl; + CUcontext dummy; + + CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx)); + CHECK_CU(cu->cuModuleUnload(s->cu_module)); + s->cu_module = NULL; + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + } + + av_frame_free(&s->own_frame); + av_buffer_unref(&s->frames_ctx); + av_frame_free(&s->tmp_frame); +} + +static av_cold int init_hwframe_ctx(CUDAColorspaceContext* s, AVBufferRef* device_ctx, + int width, int height) +{ + AVBufferRef* out_ref = NULL; + AVHWFramesContext* out_ctx; + int ret; + + out_ref = av_hwframe_ctx_alloc(device_ctx); + if (!out_ref) + return AVERROR(ENOMEM); + + out_ctx = (AVHWFramesContext*)out_ref->data; + + out_ctx->format = AV_PIX_FMT_CUDA; + out_ctx->sw_format = s->pix_fmt; + out_ctx->width = FFALIGN(width, 32); + out_ctx->height = FFALIGN(height, 32); + + ret = av_hwframe_ctx_init(out_ref); + if (ret < 0) + goto fail; + + av_frame_unref(s->own_frame); + ret = av_hwframe_get_buffer(out_ref, s->own_frame, 0); + if (ret < 0) + goto fail; + + s->own_frame->width = width; + s->own_frame->height = height; + + av_buffer_unref(&s->frames_ctx); + s->frames_ctx = out_ref; + + return 0; +fail: + av_buffer_unref(&out_ref); + return ret; +} + +static int format_is_supported(enum AVPixelFormat fmt) +{ + for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) + if (fmt == supported_formats[i]) + return 1; + + return 0; +} + +static av_cold int init_processing_chain(AVFilterContext* ctx, int width, + int height) +{ + CUDAColorspaceContext* s = ctx->priv; + AVHWFramesContext* in_frames_ctx; + + int ret; + + if (!ctx->inputs[0]->hw_frames_ctx) { + av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n"); + return AVERROR(EINVAL); + } + + in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data; + s->pix_fmt = in_frames_ctx->sw_format; + + if (!format_is_supported(s->pix_fmt)) { + av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", + av_get_pix_fmt_name(s->pix_fmt)); + return AVERROR(EINVAL); + } + + if ((AVCOL_RANGE_MPEG != s->range) && (AVCOL_RANGE_JPEG != s->range)) { + av_log(ctx, AV_LOG_ERROR, "Unsupported color range\n"); + return AVERROR(EINVAL); + } + + s->num_planes = av_pix_fmt_count_planes(s->pix_fmt); + + ret = init_hwframe_ctx(s, in_frames_ctx->device_ref, width, height); + if (ret < 0) + return ret; + + ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx); + if (!ctx->outputs[0]->hw_frames_ctx) + return AVERROR(ENOMEM); + + return 0; +} + +static av_cold int cudacolorspace_load_functions(AVFilterContext* ctx) +{ + CUDAColorspaceContext* s = ctx->priv; + CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; + CudaFunctions* cu = s->hwctx->internal->cuda_dl; + int ret; + + extern const unsigned char ff_vf_colorspace_cuda_ptx_data[]; + extern const unsigned int ff_vf_colorspace_cuda_ptx_len; + + ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); + if (ret < 0) + return ret; + + ret = ff_cuda_load_module(ctx, s->hwctx, &s->cu_module, + ff_vf_colorspace_cuda_ptx_data, + ff_vf_colorspace_cuda_ptx_len); + if (ret < 0) + goto fail; + + ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_convert[AVCOL_RANGE_MPEG], s->cu_module, "to_mpeg_cuda")); + if (ret < 0) + goto fail; + + ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_convert[AVCOL_RANGE_JPEG], s->cu_module, "to_jpeg_cuda")); + if (ret < 0) + goto fail; + +fail: + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + return ret; +} + +static av_cold int cudacolorspace_config_props(AVFilterLink* outlink) +{ + AVFilterContext* ctx = outlink->src; + AVFilterLink* inlink = outlink->src->inputs[0]; + CUDAColorspaceContext* s = ctx->priv; + AVHWFramesContext* frames_ctx = + (AVHWFramesContext*)inlink->hw_frames_ctx->data; + AVCUDADeviceContext* device_hwctx = frames_ctx->device_ctx->hwctx; + int ret; + + s->hwctx = device_hwctx; + s->cu_stream = s->hwctx->stream; + + outlink->w = inlink->w; + outlink->h = inlink->h; + + ret = init_processing_chain(ctx, inlink->w, inlink->h); + if (ret < 0) + return ret; + + if (inlink->sample_aspect_ratio.num) { + outlink->sample_aspect_ratio = av_mul_q( + (AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, + inlink->sample_aspect_ratio); + } else { + outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; + } + + ret = cudacolorspace_load_functions(ctx); + if (ret < 0) + return ret; + + return ret; +} + +static int conv_cuda_convert(AVFilterContext* ctx, AVFrame* out, AVFrame* in) +{ + CUDAColorspaceContext* s = ctx->priv; + CudaFunctions* cu = s->hwctx->internal->cuda_dl; + CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; + int ret; + + ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); + if (ret < 0) + return ret; + + out->color_range = s->range; + + for (int i = 0; i < s->num_planes; i++) { + int width = in->width, height = in->height, comp_id = (i > 0); + + switch (s->pix_fmt) { + case AV_PIX_FMT_YUV444P: + break; + case AV_PIX_FMT_YUV420P: + width = comp_id ? in->width / 2 : in->width; + case AV_PIX_FMT_NV12: + height = comp_id ? in->height / 2 : in->height; + break; + default: + av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", + av_get_pix_fmt_name(s->pix_fmt)); + return AVERROR(EINVAL); + } + + if (!s->cu_convert[out->color_range]) { + av_log(ctx, AV_LOG_ERROR, "Unsupported color range\n"); + return AVERROR(EINVAL); + } + + if (in->color_range != out->color_range) { + void* args[] = {&in->data[i], &out->data[i], &in->linesize[i], + &comp_id}; + ret = CHECK_CU(cu->cuLaunchKernel( + s->cu_convert[out->color_range], DIV_UP(width, BLOCKX), + DIV_UP(height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, s->cu_stream, + args, NULL)); + } else { + ret = av_hwframe_transfer_data(out, in, 0); + if (ret < 0) + return ret; + } + } + + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + return ret; +} + +static int cudacolorspace_conv(AVFilterContext* ctx, AVFrame* out, AVFrame* in) +{ + CUDAColorspaceContext* s = ctx->priv; + AVFilterLink* outlink = ctx->outputs[0]; + AVFrame* src = in; + int ret; + + ret = conv_cuda_convert(ctx, s->own_frame, src); + if (ret < 0) + return ret; + + src = s->own_frame; + ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0); + if (ret < 0) + return ret; + + av_frame_move_ref(out, s->own_frame); + av_frame_move_ref(s->own_frame, s->tmp_frame); + + s->own_frame->width = outlink->w; + s->own_frame->height = outlink->h; + + ret = av_frame_copy_props(out, in); + if (ret < 0) + return ret; + + return 0; +} + +static int cudacolorspace_filter_frame(AVFilterLink* link, AVFrame* in) +{ + AVFilterContext* ctx = link->dst; + CUDAColorspaceContext* s = ctx->priv; + AVFilterLink* outlink = ctx->outputs[0]; + CudaFunctions* cu = s->hwctx->internal->cuda_dl; + + AVFrame* out = NULL; + CUcontext dummy; + int ret = 0; + + out = av_frame_alloc(); + if (!out) { + ret = AVERROR(ENOMEM); + goto fail; + } + + ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx)); + if (ret < 0) + goto fail; + + ret = cudacolorspace_conv(ctx, out, in); + + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + if (ret < 0) + goto fail; + + av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den, + (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w, + (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h, + INT_MAX); + + av_frame_free(&in); + return ff_filter_frame(outlink, out); +fail: + av_frame_free(&in); + av_frame_free(&out); + return ret; +} + +#define OFFSET(x) offsetof(CUDAColorspaceContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption options[] = { + {"range", "Output video range", OFFSET(range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_UNSPECIFIED }, AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_NB - 1, FLAGS, "range"}, + {"tv", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range"}, + {"mpeg", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range"}, + {"pc", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range"}, + {"jpeg", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range"}, + {NULL}, +}; + +static const AVClass cudacolorspace_class = { + .class_name = "colorspace_cuda", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + +static const AVFilterPad cudacolorspace_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = cudacolorspace_filter_frame, + }, +}; + +static const AVFilterPad cudacolorspace_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = cudacolorspace_config_props, + }, +}; + +const AVFilter ff_vf_colorspace_cuda = { + .name = "colorspace_cuda", + .description = NULL_IF_CONFIG_SMALL("CUDA accelerated video color converter"), + + .init = cudacolorspace_init, + .uninit = cudacolorspace_uninit, + + .priv_size = sizeof(CUDAColorspaceContext), + .priv_class = &cudacolorspace_class, + + FILTER_INPUTS(cudacolorspace_inputs), + FILTER_OUTPUTS(cudacolorspace_outputs), + + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_CUDA), + + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; diff --git a/libavfilter/vf_colorspace_cuda.cu b/libavfilter/vf_colorspace_cuda.cu new file mode 100644 index 0000000000..59db9f69f3 --- /dev/null +++ b/libavfilter/vf_colorspace_cuda.cu @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +extern "C" { +#define MPEG_LUMA_MIN (16) +#define MPEG_CHROMA_MIN (16) +#define MPEG_LUMA_MAX (235) +#define MPEG_CHROMA_MAX (240) + +#define JPEG_LUMA_MIN (0) +#define JPEG_CHROMA_MIN (1) +#define JPEG_LUMA_MAX (255) +#define JPEG_CHROMA_MAX (255) + +__device__ int mpeg_min[] = {MPEG_LUMA_MIN, MPEG_CHROMA_MIN}; +__device__ int mpeg_max[] = {MPEG_LUMA_MAX, MPEG_CHROMA_MAX}; + +__device__ int jpeg_min[] = {JPEG_LUMA_MIN, JPEG_CHROMA_MIN}; +__device__ int jpeg_max[] = {JPEG_LUMA_MAX, JPEG_CHROMA_MAX}; + +__device__ int clamp(int val, int min, int max) +{ + if (val < min) + return min; + else if (val > max) + return max; + else + return val; +} + +__global__ void to_jpeg_cuda(const unsigned char* src, unsigned char* dst, + int pitch, int comp_id) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int src_, dst_; + + // 8 bit -> 15 bit for better precision + src_ = static_cast<int>(src[x + y * pitch]) << 7; + + // Conversion + dst_ = comp_id ? (min(src_, 30775) * 4663 - 9289992) >> 12 // chroma + : (min(src_, 30189) * 19077 - 39057361) >> 14; // luma + + // Dither replacement + dst_ = dst_ + 64; + + // Back to 8 bit + dst_ = clamp(dst_ >> 7, jpeg_min[comp_id], jpeg_max[comp_id]); + dst[x + y * pitch] = static_cast<unsigned char>(dst_); +} + +__global__ void to_mpeg_cuda(const unsigned char* src, unsigned char* dst, + int pitch, int comp_id) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int src_, dst_; + + // 8 bit -> 15 bit for better precision + src_ = static_cast<int>(src[x + y * pitch]) << 7; + + // Conversion + dst_ = comp_id ? (src_ * 1799 + 4081085) >> 11 // chroma + : (src_ * 14071 + 33561947) >> 14; // luma + + // Dither replacement + dst_ = dst_ + 64; + + // Back to 8 bit + dst_ = clamp(dst_ >> 7, mpeg_min[comp_id], mpeg_max[comp_id]); + dst[x + y * pitch] = static_cast<unsigned char>(dst_); +} + +} |