lavc/hevc*: move to hevc/ subdir

author: Anton Khirnov <anton@khirnov.net> 2024-05-29 09:50:48 +0200
committer: Anton Khirnov <anton@khirnov.net> 2024-06-04 11:46:27 +0200
commit: e4601cc3390eec6ccbfc1139bdd102b4e801ae80 (patch)
tree: 3f31fad838457e80f3f922bc597ea85ab2c81319 /libavcodec/hevc
parent: ba56a300a94bdf5520ac1324a8e7fbaeea430904 (diff)
download: ffmpeg-e4601cc3390eec6ccbfc1139bdd102b4e801ae80.tar.gz
24 files changed, 14077 insertions, 0 deletions
diff --git a/libavcodec/hevc/Makefile b/libavcodec/hevc/Makefile
new file mode 100644
index 0000000000..9c385ef3da
--- /dev/null
+++ b/libavcodec/hevc/Makefile
@@ -0,0 +1,36 @@
+clean::
+	$(RM) $(CLEANSUFFIXES:%=libavcodec/hevc/%)
+
+OBJS-$(CONFIG_HEVC_DECODER) += \
+    aom_film_grain.o           \
+    h274.o                     \
+    hevc/cabac.o               \
+    hevc/data.o                \
+    hevc/dsp.o                 \
+    hevc/filter.o              \
+    hevc/hevcdec.o             \
+    hevc/mvs.o                 \
+    hevc/pred.o                \
+    hevc/refs.o                \
+
+OBJS-$(CONFIG_HEVC_PARSER) += \
+    hevc/parser.o             \
+
+
+OBJS-$(CONFIG_HEVCPARSE) += \
+    h2645data.o             \
+    h2645_parse.o           \
+    h2645_vui.o             \
+    hevc/data.o             \
+    hevc/parse.o            \
+    hevc/ps.o               \
+
+
+OBJS-$(CONFIG_HEVC_SEI) +=  \
+    hevc/sei.o              \
+    h2645_sei.o             \
+    dynamic_hdr_vivid.o     \
+    aom_film_grain.o        \
+
+
+libavcodec/hevc/%.o: CPPFLAGS += -I$(SRC_PATH)/libavcodec/
diff --git a/libavcodec/hevc/cabac.c b/libavcodec/hevc/cabac.c
new file mode 100644
index 0000000000..37f144758a
--- /dev/null
+++ b/libavcodec/hevc/cabac.c
@@ -0,0 +1,1514 @@
+/*
+ * HEVC CABAC decoding
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ * Copyright (C) 2012 - 2013 Gildas Cocherel
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/common.h"
+
+#include "cabac_functions.h"
+#include "data.h"
+#include "hevc.h"
+#include "hevcdec.h"
+
+#define CABAC_MAX_BIN 31
+
+// ELEM(NAME, NUM_BINS)
+#define CABAC_ELEMS(ELEM)                     \
+    ELEM(SAO_MERGE_FLAG, 1)                   \
+    ELEM(SAO_TYPE_IDX, 1)                     \
+    ELEM(SAO_EO_CLASS, 0)                     \
+    ELEM(SAO_BAND_POSITION, 0)                \
+    ELEM(SAO_OFFSET_ABS, 0)                   \
+    ELEM(SAO_OFFSET_SIGN, 0)                  \
+    ELEM(END_OF_SLICE_FLAG, 0)                \
+    ELEM(SPLIT_CODING_UNIT_FLAG, 3)           \
+    ELEM(CU_TRANSQUANT_BYPASS_FLAG, 1)        \
+    ELEM(SKIP_FLAG, 3)                        \
+    ELEM(CU_QP_DELTA, 3)                      \
+    ELEM(PRED_MODE_FLAG, 1)                   \
+    ELEM(PART_MODE, 4)                        \
+    ELEM(PCM_FLAG, 0)                         \
+    ELEM(PREV_INTRA_LUMA_PRED_FLAG, 1)        \
+    ELEM(MPM_IDX, 0)                          \
+    ELEM(REM_INTRA_LUMA_PRED_MODE, 0)         \
+    ELEM(INTRA_CHROMA_PRED_MODE, 2)           \
+    ELEM(MERGE_FLAG, 1)                       \
+    ELEM(MERGE_IDX, 1)                        \
+    ELEM(INTER_PRED_IDC, 5)                   \
+    ELEM(REF_IDX_L0, 2)                       \
+    ELEM(REF_IDX_L1, 2)                       \
+    ELEM(ABS_MVD_GREATER0_FLAG, 2)            \
+    ELEM(ABS_MVD_GREATER1_FLAG, 2)            \
+    ELEM(ABS_MVD_MINUS2, 0)                   \
+    ELEM(MVD_SIGN_FLAG, 0)                    \
+    ELEM(MVP_LX_FLAG, 1)                      \
+    ELEM(NO_RESIDUAL_DATA_FLAG, 1)            \
+    ELEM(SPLIT_TRANSFORM_FLAG, 3)             \
+    ELEM(CBF_LUMA, 2)                         \
+    ELEM(CBF_CB_CR, 5)                        \
+    ELEM(TRANSFORM_SKIP_FLAG, 2)              \
+    ELEM(EXPLICIT_RDPCM_FLAG, 2)              \
+    ELEM(EXPLICIT_RDPCM_DIR_FLAG, 2)          \
+    ELEM(LAST_SIGNIFICANT_COEFF_X_PREFIX, 18) \
+    ELEM(LAST_SIGNIFICANT_COEFF_Y_PREFIX, 18) \
+    ELEM(LAST_SIGNIFICANT_COEFF_X_SUFFIX, 0)  \
+    ELEM(LAST_SIGNIFICANT_COEFF_Y_SUFFIX, 0)  \
+    ELEM(SIGNIFICANT_COEFF_GROUP_FLAG, 4)     \
+    ELEM(SIGNIFICANT_COEFF_FLAG, 44)          \
+    ELEM(COEFF_ABS_LEVEL_GREATER1_FLAG, 24)   \
+    ELEM(COEFF_ABS_LEVEL_GREATER2_FLAG, 6)    \
+    ELEM(COEFF_ABS_LEVEL_REMAINING, 0)        \
+    ELEM(COEFF_SIGN_FLAG, 0)                  \
+    ELEM(LOG2_RES_SCALE_ABS, 8)               \
+    ELEM(RES_SCALE_SIGN_FLAG, 2)              \
+    ELEM(CU_CHROMA_QP_OFFSET_FLAG, 1)         \
+    ELEM(CU_CHROMA_QP_OFFSET_IDX, 1)          \
+
+/**
+ * Offset to ctxIdx 0 in init_values and states.
+ */
+enum {
+#define OFFSET(NAME, NUM_BINS)                     \
+    NAME ## _OFFSET,                               \
+    NAME ## _END = NAME ## _OFFSET + NUM_BINS - 1,
+CABAC_ELEMS(OFFSET)
+};
+
+#define CNU 154
+/**
+ * Indexed by init_type
+ */
+static const uint8_t init_values[3][HEVC_CONTEXTS] = {
+    { // sao_merge_flag
+      153,
+      // sao_type_idx
+      200,
+      // split_coding_unit_flag
+      139, 141, 157,
+      // cu_transquant_bypass_flag
+      154,
+      // skip_flag
+      CNU, CNU, CNU,
+      // cu_qp_delta
+      154, 154, 154,
+      // pred_mode
+      CNU,
+      // part_mode
+      184, CNU, CNU, CNU,
+      // prev_intra_luma_pred_mode
+      184,
+      // intra_chroma_pred_mode
+      63, 139,
+      // merge_flag
+      CNU,
+      // merge_idx
+      CNU,
+      // inter_pred_idc
+      CNU, CNU, CNU, CNU, CNU,
+      // ref_idx_l0
+      CNU, CNU,
+      // ref_idx_l1
+      CNU, CNU,
+      // abs_mvd_greater1_flag
+      CNU, CNU,
+      // abs_mvd_greater1_flag
+      CNU, CNU,
+      // mvp_lx_flag
+      CNU,
+      // no_residual_data_flag
+      CNU,
+      // split_transform_flag
+      153, 138, 138,
+      // cbf_luma
+      111, 141,
+      // cbf_cb, cbf_cr
+      94, 138, 182, 154, 154,
+      // transform_skip_flag
+      139, 139,
+      // explicit_rdpcm_flag
+      139, 139,
+      // explicit_rdpcm_dir_flag
+      139, 139,
+      // last_significant_coeff_x_prefix
+      110, 110, 124, 125, 140, 153, 125, 127, 140, 109, 111, 143, 127, 111,
+       79, 108, 123,  63,
+      // last_significant_coeff_y_prefix
+      110, 110, 124, 125, 140, 153, 125, 127, 140, 109, 111, 143, 127, 111,
+       79, 108, 123,  63,
+      // significant_coeff_group_flag
+      91, 171, 134, 141,
+      // significant_coeff_flag
+      111, 111, 125, 110, 110,  94, 124, 108, 124, 107, 125, 141, 179, 153,
+      125, 107, 125, 141, 179, 153, 125, 107, 125, 141, 179, 153, 125, 140,
+      139, 182, 182, 152, 136, 152, 136, 153, 136, 139, 111, 136, 139, 111,
+      141, 111,
+      // coeff_abs_level_greater1_flag
+      140,  92, 137, 138, 140, 152, 138, 139, 153,  74, 149,  92, 139, 107,
+      122, 152, 140, 179, 166, 182, 140, 227, 122, 197,
+      // coeff_abs_level_greater2_flag
+      138, 153, 136, 167, 152, 152,
+      // log2_res_scale_abs
+      154, 154, 154, 154, 154, 154, 154, 154,
+      // res_scale_sign_flag
+      154, 154,
+      // cu_chroma_qp_offset_flag
+      154,
+      // cu_chroma_qp_offset_idx
+      154,
+    },
+    { // sao_merge_flag
+      153,
+      // sao_type_idx
+      185,
+      // split_coding_unit_flag
+      107, 139, 126,
+      // cu_transquant_bypass_flag
+      154,
+      // skip_flag
+      197, 185, 201,
+      // cu_qp_delta
+      154, 154, 154,
+      // pred_mode
+      149,
+      // part_mode
+      154, 139, 154, 154,
+      // prev_intra_luma_pred_mode
+      154,
+      // intra_chroma_pred_mode
+      152, 139,
+      // merge_flag
+      110,
+      // merge_idx
+      122,
+      // inter_pred_idc
+      95, 79, 63, 31, 31,
+      // ref_idx_l0
+      153, 153,
+      // ref_idx_l1
+      153, 153,
+      // abs_mvd_greater1_flag
+      140, 198,
+      // abs_mvd_greater1_flag
+      140, 198,
+      // mvp_lx_flag
+      168,
+      // no_residual_data_flag
+      79,
+      // split_transform_flag
+      124, 138, 94,
+      // cbf_luma
+      153, 111,
+      // cbf_cb, cbf_cr
+      149, 107, 167, 154, 154,
+      // transform_skip_flag
+      139, 139,
+      // explicit_rdpcm_flag
+      139, 139,
+      // explicit_rdpcm_dir_flag
+      139, 139,
+      // last_significant_coeff_x_prefix
+      125, 110,  94, 110,  95,  79, 125, 111, 110,  78, 110, 111, 111,  95,
+       94, 108, 123, 108,
+      // last_significant_coeff_y_prefix
+      125, 110,  94, 110,  95,  79, 125, 111, 110,  78, 110, 111, 111,  95,
+       94, 108, 123, 108,
+      // significant_coeff_group_flag
+      121, 140, 61, 154,
+      // significant_coeff_flag
+      155, 154, 139, 153, 139, 123, 123,  63, 153, 166, 183, 140, 136, 153,
+      154, 166, 183, 140, 136, 153, 154, 166, 183, 140, 136, 153, 154, 170,
+      153, 123, 123, 107, 121, 107, 121, 167, 151, 183, 140, 151, 183, 140,
+      140, 140,
+      // coeff_abs_level_greater1_flag
+      154, 196, 196, 167, 154, 152, 167, 182, 182, 134, 149, 136, 153, 121,
+      136, 137, 169, 194, 166, 167, 154, 167, 137, 182,
+      // coeff_abs_level_greater2_flag
+      107, 167, 91, 122, 107, 167,
+      // log2_res_scale_abs
+      154, 154, 154, 154, 154, 154, 154, 154,
+      // res_scale_sign_flag
+      154, 154,
+      // cu_chroma_qp_offset_flag
+      154,
+      // cu_chroma_qp_offset_idx
+      154,
+    },
+    { // sao_merge_flag
+      153,
+      // sao_type_idx
+      160,
+      // split_coding_unit_flag
+      107, 139, 126,
+      // cu_transquant_bypass_flag
+      154,
+      // skip_flag
+      197, 185, 201,
+      // cu_qp_delta
+      154, 154, 154,
+      // pred_mode
+      134,
+      // part_mode
+      154, 139, 154, 154,
+      // prev_intra_luma_pred_mode
+      183,
+      // intra_chroma_pred_mode
+      152, 139,
+      // merge_flag
+      154,
+      // merge_idx
+      137,
+      // inter_pred_idc
+      95, 79, 63, 31, 31,
+      // ref_idx_l0
+      153, 153,
+      // ref_idx_l1
+      153, 153,
+      // abs_mvd_greater1_flag
+      169, 198,
+      // abs_mvd_greater1_flag
+      169, 198,
+      // mvp_lx_flag
+      168,
+      // no_residual_data_flag
+      79,
+      // split_transform_flag
+      224, 167, 122,
+      // cbf_luma
+      153, 111,
+      // cbf_cb, cbf_cr
+      149, 92, 167, 154, 154,
+      // transform_skip_flag
+      139, 139,
+      // explicit_rdpcm_flag
+      139, 139,
+      // explicit_rdpcm_dir_flag
+      139, 139,
+      // last_significant_coeff_x_prefix
+      125, 110, 124, 110,  95,  94, 125, 111, 111,  79, 125, 126, 111, 111,
+       79, 108, 123,  93,
+      // last_significant_coeff_y_prefix
+      125, 110, 124, 110,  95,  94, 125, 111, 111,  79, 125, 126, 111, 111,
+       79, 108, 123,  93,
+      // significant_coeff_group_flag
+      121, 140, 61, 154,
+      // significant_coeff_flag
+      170, 154, 139, 153, 139, 123, 123,  63, 124, 166, 183, 140, 136, 153,
+      154, 166, 183, 140, 136, 153, 154, 166, 183, 140, 136, 153, 154, 170,
+      153, 138, 138, 122, 121, 122, 121, 167, 151, 183, 140, 151, 183, 140,
+      140, 140,
+      // coeff_abs_level_greater1_flag
+      154, 196, 167, 167, 154, 152, 167, 182, 182, 134, 149, 136, 153, 121,
+      136, 122, 169, 208, 166, 167, 154, 152, 167, 182,
+      // coeff_abs_level_greater2_flag
+      107, 167, 91, 107, 107, 167,
+      // log2_res_scale_abs
+      154, 154, 154, 154, 154, 154, 154, 154,
+      // res_scale_sign_flag
+      154, 154,
+      // cu_chroma_qp_offset_flag
+      154,
+      // cu_chroma_qp_offset_idx
+      154,
+    },
+};
+
+static const uint8_t scan_1x1[1] = {
+    0,
+};
+
+static const uint8_t horiz_scan2x2_x[4] = {
+    0, 1, 0, 1,
+};
+
+static const uint8_t horiz_scan2x2_y[4] = {
+    0, 0, 1, 1
+};
+
+static const uint8_t horiz_scan4x4_x[16] = {
+    0, 1, 2, 3,
+    0, 1, 2, 3,
+    0, 1, 2, 3,
+    0, 1, 2, 3,
+};
+
+static const uint8_t horiz_scan4x4_y[16] = {
+    0, 0, 0, 0,
+    1, 1, 1, 1,
+    2, 2, 2, 2,
+    3, 3, 3, 3,
+};
+
+static const uint8_t horiz_scan8x8_inv[8][8] = {
+    {  0,  1,  2,  3, 16, 17, 18, 19, },
+    {  4,  5,  6,  7, 20, 21, 22, 23, },
+    {  8,  9, 10, 11, 24, 25, 26, 27, },
+    { 12, 13, 14, 15, 28, 29, 30, 31, },
+    { 32, 33, 34, 35, 48, 49, 50, 51, },
+    { 36, 37, 38, 39, 52, 53, 54, 55, },
+    { 40, 41, 42, 43, 56, 57, 58, 59, },
+    { 44, 45, 46, 47, 60, 61, 62, 63, },
+};
+
+static const uint8_t diag_scan2x2_x[4] = {
+    0, 0, 1, 1,
+};
+
+static const uint8_t diag_scan2x2_y[4] = {
+    0, 1, 0, 1,
+};
+
+static const uint8_t diag_scan2x2_inv[2][2] = {
+    { 0, 2, },
+    { 1, 3, },
+};
+
+static const uint8_t diag_scan4x4_inv[4][4] = {
+    { 0,  2,  5,  9, },
+    { 1,  4,  8, 12, },
+    { 3,  7, 11, 14, },
+    { 6, 10, 13, 15, },
+};
+
+static const uint8_t diag_scan8x8_inv[8][8] = {
+    {  0,  2,  5,  9, 14, 20, 27, 35, },
+    {  1,  4,  8, 13, 19, 26, 34, 42, },
+    {  3,  7, 12, 18, 25, 33, 41, 48, },
+    {  6, 11, 17, 24, 32, 40, 47, 53, },
+    { 10, 16, 23, 31, 39, 46, 52, 57, },
+    { 15, 22, 30, 38, 45, 51, 56, 60, },
+    { 21, 29, 37, 44, 50, 55, 59, 62, },
+    { 28, 36, 43, 49, 54, 58, 61, 63, },
+};
+
+void ff_hevc_save_states(HEVCLocalContext *lc, int ctb_addr_ts)
+{
+    const HEVCContext *const s = lc->parent;
+
+    if (s->ps.pps->entropy_coding_sync_enabled_flag &&
+        (ctb_addr_ts % s->ps.sps->ctb_width == 2 ||
+         (s->ps.sps->ctb_width == 2 &&
+          ctb_addr_ts % s->ps.sps->ctb_width == 0))) {
+        memcpy(lc->common_cabac_state->state, lc->cabac_state, HEVC_CONTEXTS);
+        if (s->ps.sps->persistent_rice_adaptation_enabled) {
+            memcpy(lc->common_cabac_state->stat_coeff, lc->stat_coeff, HEVC_STAT_COEFFS);
+        }
+    }
+}
+
+static void load_states(HEVCLocalContext *lc, const HEVCContext *s)
+{
+    memcpy(lc->cabac_state, lc->common_cabac_state->state, HEVC_CONTEXTS);
+    if (s->ps.sps->persistent_rice_adaptation_enabled) {
+        memcpy(lc->stat_coeff, lc->common_cabac_state->stat_coeff, HEVC_STAT_COEFFS);
+    }
+}
+
+static int cabac_reinit(HEVCLocalContext *lc)
+{
+    return skip_bytes(&lc->cc, 0) == NULL ? AVERROR_INVALIDDATA : 0;
+}
+
+static void cabac_init_state(HEVCLocalContext *lc, const HEVCContext *s)
+{
+    int init_type = 2 - s->sh.slice_type;
+    int i;
+
+    if (s->sh.cabac_init_flag && s->sh.slice_type != HEVC_SLICE_I)
+        init_type ^= 3;
+
+    for (i = 0; i < HEVC_CONTEXTS; i++) {
+        int init_value = init_values[init_type][i];
+        int m = (init_value >> 4) * 5 - 45;
+        int n = ((init_value & 15) << 3) - 16;
+        int pre = 2 * (((m * av_clip(s->sh.slice_qp, 0, 51)) >> 4) + n) - 127;
+
+        pre ^= pre >> 31;
+        if (pre > 124)
+            pre = 124 + (pre & 1);
+        lc->cabac_state[i] = pre;
+    }
+
+    for (i = 0; i < 4; i++)
+        lc->stat_coeff[i] = 0;
+}
+
+int ff_hevc_cabac_init(HEVCLocalContext *lc, int ctb_addr_ts,
+                       const uint8_t *data, size_t size)
+{
+    const HEVCContext *const s = lc->parent;
+
+    if (ctb_addr_ts == s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]) {
+        int ret = ff_init_cabac_decoder(&lc->cc, data, size);
+        if (ret < 0)
+            return ret;
+        if (s->sh.dependent_slice_segment_flag == 0 ||
+            (s->ps.pps->tiles_enabled_flag &&
+             s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]))
+            cabac_init_state(lc, s);
+
+        if (!s->sh.first_slice_in_pic_flag &&
+            s->ps.pps->entropy_coding_sync_enabled_flag) {
+            if (ctb_addr_ts % s->ps.sps->ctb_width == 0) {
+                if (s->ps.sps->ctb_width == 1)
+                    cabac_init_state(lc, s);
+                else if (s->sh.dependent_slice_segment_flag == 1)
+                    load_states(lc, s);
+            }
+        }
+    } else {
+        if (s->ps.pps->tiles_enabled_flag &&
+            s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
+            int ret;
+            if (s->threads_number == 1)
+                ret = cabac_reinit(lc);
+            else {
+                ret = ff_init_cabac_decoder(&lc->cc, data, size);
+            }
+            if (ret < 0)
+                return ret;
+            cabac_init_state(lc, s);
+        }
+        if (s->ps.pps->entropy_coding_sync_enabled_flag) {
+            if (ctb_addr_ts % s->ps.sps->ctb_width == 0) {
+                int ret;
+                get_cabac_terminate(&lc->cc);
+                if (s->threads_number == 1)
+                    ret = cabac_reinit(lc);
+                else {
+                    ret = ff_init_cabac_decoder(&lc->cc, data, size);
+                }
+                if (ret < 0)
+                    return ret;
+
+                if (s->ps.sps->ctb_width == 1)
+                    cabac_init_state(lc, s);
+                else
+                    load_states(lc, s);
+            }
+        }
+    }
+    return 0;
+}
+
+#define GET_CABAC(ctx)  get_cabac(&lc->cc, &lc->cabac_state[ctx])
+
+int ff_hevc_sao_merge_flag_decode(HEVCLocalContext *lc)
+{
+    return GET_CABAC(SAO_MERGE_FLAG_OFFSET);
+}
+
+int ff_hevc_sao_type_idx_decode(HEVCLocalContext *lc)
+{
+    if (!GET_CABAC(SAO_TYPE_IDX_OFFSET))
+        return 0;
+
+    if (!get_cabac_bypass(&lc->cc))
+        return SAO_BAND;
+    return SAO_EDGE;
+}
+
+int ff_hevc_sao_band_position_decode(HEVCLocalContext *lc)
+{
+    int i;
+    int value = get_cabac_bypass(&lc->cc);
+
+    for (i = 0; i < 4; i++)
+        value = (value << 1) | get_cabac_bypass(&lc->cc);
+    return value;
+}
+
+int ff_hevc_sao_offset_abs_decode(HEVCLocalContext *lc)
+{
+    int i = 0;
+    int length = (1 << (FFMIN(lc->parent->ps.sps->bit_depth, 10) - 5)) - 1;
+
+    while (i < length && get_cabac_bypass(&lc->cc))
+        i++;
+    return i;
+}
+
+int ff_hevc_sao_offset_sign_decode(HEVCLocalContext *lc)
+{
+    return get_cabac_bypass(&lc->cc);
+}
+
+int ff_hevc_sao_eo_class_decode(HEVCLocalContext *lc)
+{
+    int ret = get_cabac_bypass(&lc->cc) << 1;
+    ret    |= get_cabac_bypass(&lc->cc);
+    return ret;
+}
+
+int ff_hevc_end_of_slice_flag_decode(HEVCLocalContext *lc)
+{
+    return get_cabac_terminate(&lc->cc);
+}
+
+int ff_hevc_cu_transquant_bypass_flag_decode(HEVCLocalContext *lc)
+{
+    return GET_CABAC(CU_TRANSQUANT_BYPASS_FLAG_OFFSET);
+}
+
+int ff_hevc_skip_flag_decode(HEVCLocalContext *lc, int x0, int y0, int x_cb, int y_cb)
+{
+    const HEVCContext *const s = lc->parent;
+    int min_cb_width = s->ps.sps->min_cb_width;
+    int inc = 0;
+    int x0b = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
+    int y0b = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
+
+    if (lc->ctb_left_flag || x0b)
+        inc = !!SAMPLE_CTB(s->skip_flag, x_cb - 1, y_cb);
+    if (lc->ctb_up_flag || y0b)
+        inc += !!SAMPLE_CTB(s->skip_flag, x_cb, y_cb - 1);
+
+    return GET_CABAC(SKIP_FLAG_OFFSET + inc);
+}
+
+int ff_hevc_cu_qp_delta_abs(HEVCLocalContext *lc)
+{
+    int prefix_val = 0;
+    int suffix_val = 0;
+    int inc = 0;
+
+    while (prefix_val < 5 && GET_CABAC(CU_QP_DELTA_OFFSET + inc)) {
+        prefix_val++;
+        inc = 1;
+    }
+    if (prefix_val >= 5) {
+        int k = 0;
+        while (k < 7 && get_cabac_bypass(&lc->cc)) {
+            suffix_val += 1 << k;
+            k++;
+        }
+        if (k == 7) {
+            av_log(lc->logctx, AV_LOG_ERROR, "CABAC_MAX_BIN : %d\n", k);
+            return AVERROR_INVALIDDATA;
+        }
+
+        while (k--)
+            suffix_val += get_cabac_bypass(&lc->cc) << k;
+    }
+    return prefix_val + suffix_val;
+}
+
+int ff_hevc_cu_qp_delta_sign_flag(HEVCLocalContext *lc)
+{
+    return get_cabac_bypass(&lc->cc);
+}
+
+int ff_hevc_cu_chroma_qp_offset_flag(HEVCLocalContext *lc)
+{
+    return GET_CABAC(CU_CHROMA_QP_OFFSET_FLAG_OFFSET);
+}
+
+int ff_hevc_cu_chroma_qp_offset_idx(HEVCLocalContext *lc)
+{
+    int c_max= FFMAX(5, lc->parent->ps.pps->chroma_qp_offset_list_len_minus1);
+    int i = 0;
+
+    while (i < c_max && GET_CABAC(CU_CHROMA_QP_OFFSET_IDX_OFFSET))
+        i++;
+
+    return i;
+}
+
+int ff_hevc_pred_mode_decode(HEVCLocalContext *lc)
+{
+    return GET_CABAC(PRED_MODE_FLAG_OFFSET);
+}
+
+int ff_hevc_split_coding_unit_flag_decode(HEVCLocalContext *lc, int ct_depth, int x0, int y0)
+{
+    const HEVCContext *const s = lc->parent;
+    const HEVCSPS *const sps = s->ps.sps;
+    int inc = 0, depth_left = 0, depth_top = 0;
+    int x0b  = av_mod_uintp2(x0, sps->log2_ctb_size);
+    int y0b  = av_mod_uintp2(y0, sps->log2_ctb_size);
+    int x_cb = x0 >> sps->log2_min_cb_size;
+    int y_cb = y0 >> sps->log2_min_cb_size;
+
+    if (lc->ctb_left_flag || x0b)
+        depth_left = s->tab_ct_depth[(y_cb)     * sps->min_cb_width + x_cb - 1];
+    if (lc->ctb_up_flag || y0b)
+        depth_top  = s->tab_ct_depth[(y_cb - 1) * sps->min_cb_width + x_cb];
+
+    inc += (depth_left > ct_depth);
+    inc += (depth_top  > ct_depth);
+
+    return GET_CABAC(SPLIT_CODING_UNIT_FLAG_OFFSET + inc);
+}
+
+int ff_hevc_part_mode_decode(HEVCLocalContext *lc, int log2_cb_size)
+{
+    if (GET_CABAC(PART_MODE_OFFSET)) // 1
+        return PART_2Nx2N;
+    if (log2_cb_size == lc->parent->ps.sps->log2_min_cb_size) {
+        if (lc->cu.pred_mode == MODE_INTRA) // 0
+            return PART_NxN;
+        if (GET_CABAC(PART_MODE_OFFSET + 1)) // 01
+            return PART_2NxN;
+        if (log2_cb_size == 3) // 00
+            return PART_Nx2N;
+        if (GET_CABAC(PART_MODE_OFFSET + 2)) // 001
+            return PART_Nx2N;
+        return PART_NxN; // 000
+    }
+
+    if (!lc->parent->ps.sps->amp_enabled) {
+        if (GET_CABAC(PART_MODE_OFFSET + 1)) // 01
+            return PART_2NxN;
+        return PART_Nx2N;
+    }
+
+    if (GET_CABAC(PART_MODE_OFFSET + 1)) { // 01X, 01XX
+        if (GET_CABAC(PART_MODE_OFFSET + 3)) // 011
+            return PART_2NxN;
+        if (get_cabac_bypass(&lc->cc)) // 0101
+            return PART_2NxnD;
+        return PART_2NxnU; // 0100
+    }
+
+    if (GET_CABAC(PART_MODE_OFFSET + 3)) // 001
+        return PART_Nx2N;
+    if (get_cabac_bypass(&lc->cc)) // 0001
+        return PART_nRx2N;
+    return PART_nLx2N;  // 0000
+}
+
+int ff_hevc_pcm_flag_decode(HEVCLocalContext *lc)
+{
+    return get_cabac_terminate(&lc->cc);
+}
+
+int ff_hevc_prev_intra_luma_pred_flag_decode(HEVCLocalContext *lc)
+{
+    return GET_CABAC(PREV_INTRA_LUMA_PRED_FLAG_OFFSET);
+}
+
+int ff_hevc_mpm_idx_decode(HEVCLocalContext *lc)
+{
+    int i = 0;
+    while (i < 2 && get_cabac_bypass(&lc->cc))
+        i++;
+    return i;
+}
+
+int ff_hevc_rem_intra_luma_pred_mode_decode(HEVCLocalContext *lc)
+{
+    int i;
+    int value = get_cabac_bypass(&lc->cc);
+
+    for (i = 0; i < 4; i++)
+        value = (value << 1) | get_cabac_bypass(&lc->cc);
+    return value;
+}
+
+int ff_hevc_intra_chroma_pred_mode_decode(HEVCLocalContext *lc)
+{
+    int ret;
+    if (!GET_CABAC(INTRA_CHROMA_PRED_MODE_OFFSET))
+        return 4;
+
+    ret  = get_cabac_bypass(&lc->cc) << 1;
+    ret |= get_cabac_bypass(&lc->cc);
+    return ret;
+}
+
+int ff_hevc_merge_idx_decode(HEVCLocalContext *lc)
+{
+    int i = GET_CABAC(MERGE_IDX_OFFSET);
+
+    if (i != 0) {
+        while (i < lc->parent->sh.max_num_merge_cand-1 && get_cabac_bypass(&lc->cc))
+            i++;
+    }
+    return i;
+}
+
+int ff_hevc_merge_flag_decode(HEVCLocalContext *lc)
+{
+    return GET_CABAC(MERGE_FLAG_OFFSET);
+}
+
+int ff_hevc_inter_pred_idc_decode(HEVCLocalContext *lc, int nPbW, int nPbH)
+{
+    if (nPbW + nPbH == 12)
+        return GET_CABAC(INTER_PRED_IDC_OFFSET + 4);
+    if (GET_CABAC(INTER_PRED_IDC_OFFSET + lc->ct_depth))
+        return PRED_BI;
+
+    return GET_CABAC(INTER_PRED_IDC_OFFSET + 4);
+}
+
+int ff_hevc_ref_idx_lx_decode(HEVCLocalContext *lc, int num_ref_idx_lx)
+{
+    int i = 0;
+    int max = num_ref_idx_lx - 1;
+    int max_ctx = FFMIN(max, 2);
+
+    while (i < max_ctx && GET_CABAC(REF_IDX_L0_OFFSET + i))
+        i++;
+    if (i == 2) {
+        while (i < max && get_cabac_bypass(&lc->cc))
+            i++;
+    }
+
+    return i;
+}
+
+int ff_hevc_mvp_lx_flag_decode(HEVCLocalContext *lc)
+{
+    return GET_CABAC(MVP_LX_FLAG_OFFSET);
+}
+
+int ff_hevc_no_residual_syntax_flag_decode(HEVCLocalContext *lc)
+{
+    return GET_CABAC(NO_RESIDUAL_DATA_FLAG_OFFSET);
+}
+
+static av_always_inline int abs_mvd_greater0_flag_decode(HEVCLocalContext *lc)
+{
+    return GET_CABAC(ABS_MVD_GREATER0_FLAG_OFFSET);
+}
+
+static av_always_inline int abs_mvd_greater1_flag_decode(HEVCLocalContext *lc)
+{
+    return GET_CABAC(ABS_MVD_GREATER1_FLAG_OFFSET + 1);
+}
+
+static av_always_inline int mvd_decode(HEVCLocalContext *lc)
+{
+    int ret = 2;
+    int k = 1;
+
+    while (k < CABAC_MAX_BIN && get_cabac_bypass(&lc->cc)) {
+        ret += 1U << k;
+        k++;
+    }
+    if (k == CABAC_MAX_BIN) {
+        av_log(lc->logctx, AV_LOG_ERROR, "CABAC_MAX_BIN : %d\n", k);
+        return 0;
+    }
+    while (k--)
+        ret += get_cabac_bypass(&lc->cc) << k;
+    return get_cabac_bypass_sign(&lc->cc, -ret);
+}
+
+static av_always_inline int mvd_sign_flag_decode(HEVCLocalContext *lc)
+{
+    return get_cabac_bypass_sign(&lc->cc, -1);
+}
+
+int ff_hevc_split_transform_flag_decode(HEVCLocalContext *lc, int log2_trafo_size)
+{
+    return GET_CABAC(SPLIT_TRANSFORM_FLAG_OFFSET + 5 - log2_trafo_size);
+}
+
+int ff_hevc_cbf_cb_cr_decode(HEVCLocalContext *lc, int trafo_depth)
+{
+    return GET_CABAC(CBF_CB_CR_OFFSET + trafo_depth);
+}
+
+int ff_hevc_cbf_luma_decode(HEVCLocalContext *lc, int trafo_depth)
+{
+    return GET_CABAC(CBF_LUMA_OFFSET + !trafo_depth);
+}
+
+static int hevc_transform_skip_flag_decode(HEVCLocalContext *lc, int c_idx)
+{
+    return GET_CABAC(TRANSFORM_SKIP_FLAG_OFFSET + !!c_idx);
+}
+
+static int explicit_rdpcm_flag_decode(HEVCLocalContext *lc, int c_idx)
+{
+    return GET_CABAC(EXPLICIT_RDPCM_FLAG_OFFSET + !!c_idx);
+}
+
+static int explicit_rdpcm_dir_flag_decode(HEVCLocalContext *lc, int c_idx)
+{
+    return GET_CABAC(EXPLICIT_RDPCM_DIR_FLAG_OFFSET + !!c_idx);
+}
+
+int ff_hevc_log2_res_scale_abs(HEVCLocalContext *lc, int idx)
+{
+    int i =0;
+
+    while (i < 4 && GET_CABAC(LOG2_RES_SCALE_ABS_OFFSET + 4 * idx + i))
+        i++;
+
+    return i;
+}
+
+int ff_hevc_res_scale_sign_flag(HEVCLocalContext *lc, int idx)
+{
+    return GET_CABAC(RES_SCALE_SIGN_FLAG_OFFSET + idx);
+}
+
+static av_always_inline void last_significant_coeff_xy_prefix_decode(HEVCLocalContext *lc, int c_idx,
+                                                   int log2_size, int *last_scx_prefix, int *last_scy_prefix)
+{
+    int i = 0;
+    int max = (log2_size << 1) - 1;
+    int ctx_offset, ctx_shift;
+
+    if (!c_idx) {
+        ctx_offset = 3 * (log2_size - 2)  + ((log2_size - 1) >> 2);
+        ctx_shift = (log2_size + 1) >> 2;
+    } else {
+        ctx_offset = 15;
+        ctx_shift = log2_size - 2;
+    }
+    while (i < max &&
+           GET_CABAC(LAST_SIGNIFICANT_COEFF_X_PREFIX_OFFSET + (i >> ctx_shift) + ctx_offset))
+        i++;
+    *last_scx_prefix = i;
+
+    i = 0;
+    while (i < max &&
+           GET_CABAC(LAST_SIGNIFICANT_COEFF_Y_PREFIX_OFFSET + (i >> ctx_shift) + ctx_offset))
+        i++;
+    *last_scy_prefix = i;
+}
+
+static av_always_inline int last_significant_coeff_suffix_decode(HEVCLocalContext *lc,
+                                                 int last_significant_coeff_prefix)
+{
+    int i;
+    int length = (last_significant_coeff_prefix >> 1) - 1;
+    int value = get_cabac_bypass(&lc->cc);
+
+    for (i = 1; i < length; i++)
+        value = (value << 1) | get_cabac_bypass(&lc->cc);
+    return value;
+}
+
+static av_always_inline int significant_coeff_group_flag_decode(HEVCLocalContext *lc, int c_idx, int ctx_cg)
+{
+    int inc;
+
+    inc = FFMIN(ctx_cg, 1) + (c_idx>0 ? 2 : 0);
+
+    return GET_CABAC(SIGNIFICANT_COEFF_GROUP_FLAG_OFFSET + inc);
+}
+static av_always_inline int significant_coeff_flag_decode(HEVCLocalContext *lc, int x_c, int y_c,
+                                           int offset, const uint8_t *ctx_idx_map)
+{
+    int inc = ctx_idx_map[(y_c << 2) + x_c] + offset;
+    return GET_CABAC(SIGNIFICANT_COEFF_FLAG_OFFSET + inc);
+}
+
+static av_always_inline int significant_coeff_flag_decode_0(HEVCLocalContext *lc, int c_idx, int offset)
+{
+    return GET_CABAC(SIGNIFICANT_COEFF_FLAG_OFFSET + offset);
+}
+
+static av_always_inline int coeff_abs_level_greater1_flag_decode(HEVCLocalContext *lc, int c_idx, int inc)
+{
+
+    if (c_idx > 0)
+        inc += 16;
+
+    return GET_CABAC(COEFF_ABS_LEVEL_GREATER1_FLAG_OFFSET + inc);
+}
+
+static av_always_inline int coeff_abs_level_greater2_flag_decode(HEVCLocalContext *lc, int c_idx, int inc)
+{
+    if (c_idx > 0)
+        inc += 4;
+
+    return GET_CABAC(COEFF_ABS_LEVEL_GREATER2_FLAG_OFFSET + inc);
+}
+
+static av_always_inline int coeff_abs_level_remaining_decode(HEVCLocalContext *lc, int rc_rice_param)
+{
+    int prefix = 0;
+    int suffix = 0;
+    int last_coeff_abs_level_remaining;
+    int i;
+
+    while (prefix < CABAC_MAX_BIN && get_cabac_bypass(&lc->cc))
+        prefix++;
+
+    if (prefix < 3) {
+        for (i = 0; i < rc_rice_param; i++)
+            suffix = (suffix << 1) | get_cabac_bypass(&lc->cc);
+        last_coeff_abs_level_remaining = (prefix << rc_rice_param) + suffix;
+    } else {
+        int prefix_minus3 = prefix - 3;
+
+        if (prefix == CABAC_MAX_BIN || prefix_minus3 + rc_rice_param > 16 + 6) {
+            av_log(lc->logctx, AV_LOG_ERROR, "CABAC_MAX_BIN : %d\n", prefix);
+            return 0;
+        }
+
+        for (i = 0; i < prefix_minus3 + rc_rice_param; i++)
+            suffix = (suffix << 1) | get_cabac_bypass(&lc->cc);
+        last_coeff_abs_level_remaining = (((1 << prefix_minus3) + 3 - 1)
+                                              << rc_rice_param) + suffix;
+    }
+    return last_coeff_abs_level_remaining;
+}
+
+static av_always_inline int coeff_sign_flag_decode(HEVCLocalContext *lc, uint8_t nb)
+{
+    int i;
+    int ret = 0;
+
+    for (i = 0; i < nb; i++)
+        ret = (ret << 1) | get_cabac_bypass(&lc->cc);
+    return ret;
+}
+
+void ff_hevc_hls_residual_coding(HEVCLocalContext *lc, int x0, int y0,
+                                int log2_trafo_size, enum ScanType scan_idx,
+                                int c_idx)
+{
+#define GET_COORD(offset, n)                                    \
+    do {                                                        \
+        x_c = (x_cg << 2) + scan_x_off[n];                      \
+        y_c = (y_cg << 2) + scan_y_off[n];                      \
+    } while (0)
+    const HEVCContext *const s = lc->parent;
+    int transform_skip_flag = 0;
+
+    int last_significant_coeff_x, last_significant_coeff_y;
+    int last_scan_pos;
+    int n_end;
+    int num_coeff = 0;
+    int greater1_ctx = 1;
+
+    int num_last_subset;
+    int x_cg_last_sig, y_cg_last_sig;
+
+    const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
+
+    ptrdiff_t stride = s->cur_frame->f->linesize[c_idx];
+    int hshift = s->ps.sps->hshift[c_idx];
+    int vshift = s->ps.sps->vshift[c_idx];
+    uint8_t *dst = &s->cur_frame->f->data[c_idx][(y0 >> vshift) * stride +
+                                          ((x0 >> hshift) << s->ps.sps->pixel_shift)];
+    int16_t *coeffs = (int16_t*)(c_idx ? lc->edge_emu_buffer2 : lc->edge_emu_buffer);
+    uint8_t significant_coeff_group_flag[8][8] = {{0}};
+    int explicit_rdpcm_flag = 0;
+    int explicit_rdpcm_dir_flag;
+
+    int trafo_size = 1 << log2_trafo_size;
+    int i;
+    int qp,shift,add,scale,scale_m;
+    static const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
+    const uint8_t *scale_matrix = NULL;
+    uint8_t dc_scale;
+    int pred_mode_intra = (c_idx == 0) ? lc->tu.intra_pred_mode :
+                                         lc->tu.intra_pred_mode_c;
+
+    memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t));
+
+    // Derive QP for dequant
+    if (!lc->cu.cu_transquant_bypass_flag) {
+        static const int qp_c[] = { 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37 };
+        static const uint8_t rem6[51 + 4 * 6 + 1] = {
+            0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
+            3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
+            0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
+            4, 5, 0, 1, 2, 3, 4, 5, 0, 1
+        };
+
+        static const uint8_t div6[51 + 4 * 6 + 1] = {
+            0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3,  3,  3,
+            3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,  6,  6,
+            7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
+            10, 10, 11, 11, 11, 11, 11, 11, 12, 12
+        };
+        int qp_y = lc->qp_y;
+
+        if (s->ps.pps->transform_skip_enabled_flag &&
+            log2_trafo_size <= s->ps.pps->log2_max_transform_skip_block_size) {
+            transform_skip_flag = hevc_transform_skip_flag_decode(lc, c_idx);
+        }
+
+        if (c_idx == 0) {
+            qp = qp_y + s->ps.sps->qp_bd_offset;
+        } else {
+            int qp_i, offset;
+
+            if (c_idx == 1)
+                offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset +
+                         lc->tu.cu_qp_offset_cb;
+            else
+                offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset +
+                         lc->tu.cu_qp_offset_cr;
+
+            qp_i = av_clip(qp_y + offset, - s->ps.sps->qp_bd_offset, 57);
+            if (s->ps.sps->chroma_format_idc == 1) {
+                if (qp_i < 30)
+                    qp = qp_i;
+                else if (qp_i > 43)
+                    qp = qp_i - 6;
+                else
+                    qp = qp_c[qp_i - 30];
+            } else {
+                if (qp_i > 51)
+                    qp = 51;
+                else
+                    qp = qp_i;
+            }
+
+            qp += s->ps.sps->qp_bd_offset;
+        }
+
+        shift    = s->ps.sps->bit_depth + log2_trafo_size - 5;
+        add      = 1 << (shift-1);
+        scale    = level_scale[rem6[qp]] << (div6[qp]);
+        scale_m  = 16; // default when no custom scaling lists.
+        dc_scale = 16;
+
+        if (s->ps.sps->scaling_list_enabled && !(transform_skip_flag && log2_trafo_size > 2)) {
+            const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
+            &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
+            int matrix_id = lc->cu.pred_mode != MODE_INTRA;
+
+            matrix_id = 3 * matrix_id + c_idx;
+
+            scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
+            if (log2_trafo_size >= 4)
+                dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
+        }
+    } else {
+        shift        = 0;
+        add          = 0;
+        scale        = 0;
+        dc_scale     = 0;
+    }
+
+    if (lc->cu.pred_mode == MODE_INTER && s->ps.sps->explicit_rdpcm_enabled &&
+        (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) {
+        explicit_rdpcm_flag = explicit_rdpcm_flag_decode(lc, c_idx);
+        if (explicit_rdpcm_flag) {
+            explicit_rdpcm_dir_flag = explicit_rdpcm_dir_flag_decode(lc, c_idx);
+        }
+    }
+
+    last_significant_coeff_xy_prefix_decode(lc, c_idx, log2_trafo_size,
+                                           &last_significant_coeff_x, &last_significant_coeff_y);
+
+    if (last_significant_coeff_x > 3) {
+        int suffix = last_significant_coeff_suffix_decode(lc, last_significant_coeff_x);
+        last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
+        (2 + (last_significant_coeff_x & 1)) +
+        suffix;
+    }
+
+    if (last_significant_coeff_y > 3) {
+        int suffix = last_significant_coeff_suffix_decode(lc, last_significant_coeff_y);
+        last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
+        (2 + (last_significant_coeff_y & 1)) +
+        suffix;
+    }
+
+    if (scan_idx == SCAN_VERT)
+        FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
+
+    x_cg_last_sig = last_significant_coeff_x >> 2;
+    y_cg_last_sig = last_significant_coeff_y >> 2;
+
+    switch (scan_idx) {
+    case SCAN_DIAG: {
+        int last_x_c = last_significant_coeff_x & 3;
+        int last_y_c = last_significant_coeff_y & 3;
+
+        scan_x_off = ff_hevc_diag_scan4x4_x;
+        scan_y_off = ff_hevc_diag_scan4x4_y;
+        num_coeff = diag_scan4x4_inv[last_y_c][last_x_c];
+        if (trafo_size == 4) {
+            scan_x_cg = scan_1x1;
+            scan_y_cg = scan_1x1;
+        } else if (trafo_size == 8) {
+            num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
+            scan_x_cg = diag_scan2x2_x;
+            scan_y_cg = diag_scan2x2_y;
+        } else if (trafo_size == 16) {
+            num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
+            scan_x_cg = ff_hevc_diag_scan4x4_x;
+            scan_y_cg = ff_hevc_diag_scan4x4_y;
+        } else { // trafo_size == 32
+            num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
+            scan_x_cg = ff_hevc_diag_scan8x8_x;
+            scan_y_cg = ff_hevc_diag_scan8x8_y;
+        }
+        break;
+    }
+    case SCAN_HORIZ:
+        scan_x_cg = horiz_scan2x2_x;
+        scan_y_cg = horiz_scan2x2_y;
+        scan_x_off = horiz_scan4x4_x;
+        scan_y_off = horiz_scan4x4_y;
+        num_coeff = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
+        break;
+    default: //SCAN_VERT
+        scan_x_cg = horiz_scan2x2_y;
+        scan_y_cg = horiz_scan2x2_x;
+        scan_x_off = horiz_scan4x4_y;
+        scan_y_off = horiz_scan4x4_x;
+        num_coeff = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
+        break;
+    }
+    num_coeff++;
+    num_last_subset = (num_coeff - 1) >> 4;
+
+    for (i = num_last_subset; i >= 0; i--) {
+        int n, m;
+        int x_cg, y_cg, x_c, y_c, pos;
+        int implicit_non_zero_coeff = 0;
+        int64_t trans_coeff_level;
+        int prev_sig = 0;
+        int offset = i << 4;
+        int rice_init = 0;
+
+        uint8_t significant_coeff_flag_idx[16];
+        uint8_t nb_significant_coeff_flag = 0;
+
+        x_cg = scan_x_cg[i];
+        y_cg = scan_y_cg[i];
+
+        if ((i < num_last_subset) && (i > 0)) {
+            int ctx_cg = 0;
+            if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
+                ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
+            if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
+                ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
+
+            significant_coeff_group_flag[x_cg][y_cg] =
+                significant_coeff_group_flag_decode(lc, c_idx, ctx_cg);
+            implicit_non_zero_coeff = 1;
+        } else {
+            significant_coeff_group_flag[x_cg][y_cg] =
+            ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
+             (x_cg == 0 && y_cg == 0));
+        }
+
+        last_scan_pos = num_coeff - offset - 1;
+
+        if (i == num_last_subset) {
+            n_end = last_scan_pos - 1;
+            significant_coeff_flag_idx[0] = last_scan_pos;
+            nb_significant_coeff_flag = 1;
+        } else {
+            n_end = 15;
+        }
+
+        if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
+            prev_sig = !!significant_coeff_group_flag[x_cg + 1][y_cg];
+        if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
+            prev_sig += (!!significant_coeff_group_flag[x_cg][y_cg + 1] << 1);
+
+        if (significant_coeff_group_flag[x_cg][y_cg] && n_end >= 0) {
+            static const uint8_t ctx_idx_map[] = {
+                0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8, // log2_trafo_size == 2
+                1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, // prev_sig == 0
+                2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, // prev_sig == 1
+                2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, // prev_sig == 2
+                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2  // default
+            };
+            const uint8_t *ctx_idx_map_p;
+            int scf_offset = 0;
+            if (s->ps.sps->transform_skip_context_enabled &&
+                (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) {
+                ctx_idx_map_p = &ctx_idx_map[4 * 16];
+                if (c_idx == 0) {
+                    scf_offset = 40;
+                } else {
+                    scf_offset = 14 + 27;
+                }
+            } else {
+                if (c_idx != 0)
+                    scf_offset = 27;
+                if (log2_trafo_size == 2) {
+                    ctx_idx_map_p = &ctx_idx_map[0];
+                } else {
+                    ctx_idx_map_p = &ctx_idx_map[(prev_sig + 1) << 4];
+                    if (c_idx == 0) {
+                        if ((x_cg > 0 || y_cg > 0))
+                            scf_offset += 3;
+                        if (log2_trafo_size == 3) {
+                            scf_offset += (scan_idx == SCAN_DIAG) ? 9 : 15;
+                        } else {
+                            scf_offset += 21;
+                        }
+                    } else {
+                        if (log2_trafo_size == 3)
+                            scf_offset += 9;
+                        else
+                            scf_offset += 12;
+                    }
+                }
+            }
+            for (n = n_end; n > 0; n--) {
+                x_c = scan_x_off[n];
+                y_c = scan_y_off[n];
+                if (significant_coeff_flag_decode(lc, x_c, y_c, scf_offset, ctx_idx_map_p)) {
+                    significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
+                    nb_significant_coeff_flag++;
+                    implicit_non_zero_coeff = 0;
+                }
+            }
+            if (implicit_non_zero_coeff == 0) {
+                if (s->ps.sps->transform_skip_context_enabled &&
+                    (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) {
+                    if (c_idx == 0) {
+                        scf_offset = 42;
+                    } else {
+                        scf_offset = 16 + 27;
+                    }
+                } else {
+                    if (i == 0) {
+                        if (c_idx == 0)
+                            scf_offset = 0;
+                        else
+                            scf_offset = 27;
+                    } else {
+                        scf_offset = 2 + scf_offset;
+                    }
+                }
+                if (significant_coeff_flag_decode_0(lc, c_idx, scf_offset) == 1) {
+                    significant_coeff_flag_idx[nb_significant_coeff_flag] = 0;
+                    nb_significant_coeff_flag++;
+                }
+            } else {
+                significant_coeff_flag_idx[nb_significant_coeff_flag] = 0;
+                nb_significant_coeff_flag++;
+            }
+        }
+
+        n_end = nb_significant_coeff_flag;
+
+
+        if (n_end) {
+            int first_nz_pos_in_cg;
+            int last_nz_pos_in_cg;
+            int c_rice_param = 0;
+            int first_greater1_coeff_idx = -1;
+            uint8_t coeff_abs_level_greater1_flag[8];
+            uint16_t coeff_sign_flag;
+            int sum_abs = 0;
+            int sign_hidden;
+            int sb_type;
+
+
+            // initialize first elem of coeff_bas_level_greater1_flag
+            int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
+
+            if (s->ps.sps->persistent_rice_adaptation_enabled) {
+                if (!transform_skip_flag && !lc->cu.cu_transquant_bypass_flag)
+                    sb_type = 2 * (c_idx == 0 ? 1 : 0);
+                else
+                    sb_type = 2 * (c_idx == 0 ? 1 : 0) + 1;
+                c_rice_param = lc->stat_coeff[sb_type] / 4;
+            }
+
+            if (!(i == num_last_subset) && greater1_ctx == 0)
+                ctx_set++;
+            greater1_ctx = 1;
+            last_nz_pos_in_cg = significant_coeff_flag_idx[0];
+
+            for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
+                int inc = (ctx_set << 2) + greater1_ctx;
+                coeff_abs_level_greater1_flag[m] =
+                    coeff_abs_level_greater1_flag_decode(lc, c_idx, inc);
+                if (coeff_abs_level_greater1_flag[m]) {
+                    greater1_ctx = 0;
+                    if (first_greater1_coeff_idx == -1)
+                        first_greater1_coeff_idx = m;
+                } else if (greater1_ctx > 0 && greater1_ctx < 3) {
+                    greater1_ctx++;
+                }
+            }
+            first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
+
+            if (lc->cu.cu_transquant_bypass_flag ||
+                (lc->cu.pred_mode ==  MODE_INTRA  &&
+                 s->ps.sps->implicit_rdpcm_enabled  &&  transform_skip_flag  &&
+                 (pred_mode_intra == 10 || pred_mode_intra  ==  26 )) ||
+                 explicit_rdpcm_flag)
+                sign_hidden = 0;
+            else
+                sign_hidden = (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4);
+
+            if (first_greater1_coeff_idx != -1) {
+                coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += coeff_abs_level_greater2_flag_decode(lc, c_idx, ctx_set);
+            }
+            if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden ) {
+                coeff_sign_flag = coeff_sign_flag_decode(lc, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
+            } else {
+                coeff_sign_flag = coeff_sign_flag_decode(lc, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
+            }
+
+            for (m = 0; m < n_end; m++) {
+                n = significant_coeff_flag_idx[m];
+                GET_COORD(offset, n);
+                if (m < 8) {
+                    trans_coeff_level = 1 + coeff_abs_level_greater1_flag[m];
+                    if (trans_coeff_level == ((m == first_greater1_coeff_idx) ? 3 : 2)) {
+                        int last_coeff_abs_level_remaining = coeff_abs_level_remaining_decode(lc, c_rice_param);
+
+                        trans_coeff_level += last_coeff_abs_level_remaining;
+                        if (trans_coeff_level > (3 << c_rice_param))
+                            c_rice_param = s->ps.sps->persistent_rice_adaptation_enabled ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4);
+                        if (s->ps.sps->persistent_rice_adaptation_enabled && !rice_init) {
+                            int c_rice_p_init = lc->stat_coeff[sb_type] / 4;
+                            if (last_coeff_abs_level_remaining >= (3 << c_rice_p_init))
+                                lc->stat_coeff[sb_type]++;
+                            else if (2 * last_coeff_abs_level_remaining < (1 << c_rice_p_init))
+                                if (lc->stat_coeff[sb_type] > 0)
+                                    lc->stat_coeff[sb_type]--;
+                            rice_init = 1;
+                        }
+                    }
+                } else {
+                    int last_coeff_abs_level_remaining = coeff_abs_level_remaining_decode(lc, c_rice_param);
+
+                    trans_coeff_level = 1 + last_coeff_abs_level_remaining;
+                    if (trans_coeff_level > (3 << c_rice_param))
+                        c_rice_param = s->ps.sps->persistent_rice_adaptation_enabled ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4);
+                    if (s->ps.sps->persistent_rice_adaptation_enabled && !rice_init) {
+                        int c_rice_p_init = lc->stat_coeff[sb_type] / 4;
+                        if (last_coeff_abs_level_remaining >= (3 << c_rice_p_init))
+                            lc->stat_coeff[sb_type]++;
+                        else if (2 * last_coeff_abs_level_remaining < (1 << c_rice_p_init))
+                            if (lc->stat_coeff[sb_type] > 0)
+                                lc->stat_coeff[sb_type]--;
+                        rice_init = 1;
+                    }
+                }
+                if (s->ps.pps->sign_data_hiding_flag && sign_hidden) {
+                    sum_abs += trans_coeff_level;
+                    if (n == first_nz_pos_in_cg && (sum_abs&1))
+                        trans_coeff_level = -trans_coeff_level;
+                }
+                if (coeff_sign_flag >> 15)
+                    trans_coeff_level = -trans_coeff_level;
+                coeff_sign_flag <<= 1;
+                if(!lc->cu.cu_transquant_bypass_flag) {
+                    if (s->ps.sps->scaling_list_enabled && !(transform_skip_flag && log2_trafo_size > 2)) {
+                        if(y_c || x_c || log2_trafo_size < 4) {
+                            switch(log2_trafo_size) {
+                                case 3: pos = (y_c << 3) + x_c; break;
+                                case 4: pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
+                                case 5: pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
+                                default: pos = (y_c << 2) + x_c; break;
+                            }
+                            scale_m = scale_matrix[pos];
+                        } else {
+                            scale_m = dc_scale;
+                        }
+                    }
+                    trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
+                    if(trans_coeff_level < 0) {
+                        if((~trans_coeff_level) & 0xFffffffffff8000)
+                            trans_coeff_level = -32768;
+                    } else {
+                        if(trans_coeff_level & 0xffffffffffff8000)
+                            trans_coeff_level = 32767;
+                    }
+                }
+                coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
+            }
+        }
+    }
+
+    if (lc->cu.cu_transquant_bypass_flag) {
+        if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled &&
+                                    (pred_mode_intra == 10 || pred_mode_intra == 26))) {
+            int mode = s->ps.sps->implicit_rdpcm_enabled ? (pred_mode_intra == 26) : explicit_rdpcm_dir_flag;
+
+            s->hevcdsp.transform_rdpcm(coeffs, log2_trafo_size, mode);
+        }
+    } else {
+        if (transform_skip_flag) {
+            int rot = s->ps.sps->transform_skip_rotation_enabled &&
+                      log2_trafo_size == 2 &&
+                      lc->cu.pred_mode == MODE_INTRA;
+            if (rot) {
+                for (i = 0; i < 8; i++)
+                    FFSWAP(int16_t, coeffs[i], coeffs[16 - i - 1]);
+            }
+
+            s->hevcdsp.dequant(coeffs, log2_trafo_size);
+
+            if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled &&
+                                        lc->cu.pred_mode == MODE_INTRA &&
+                                        (pred_mode_intra == 10 || pred_mode_intra == 26))) {
+                int mode = explicit_rdpcm_flag ? explicit_rdpcm_dir_flag : (pred_mode_intra == 26);
+
+                s->hevcdsp.transform_rdpcm(coeffs, log2_trafo_size, mode);
+            }
+        } else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 && log2_trafo_size == 2) {
+            s->hevcdsp.transform_4x4_luma(coeffs);
+        } else {
+            int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y);
+            if (max_xy == 0)
+                s->hevcdsp.idct_dc[log2_trafo_size - 2](coeffs);
+            else {
+                int col_limit = last_significant_coeff_x + last_significant_coeff_y + 4;
+                if (max_xy < 4)
+                    col_limit = FFMIN(4, col_limit);
+                else if (max_xy < 8)
+                    col_limit = FFMIN(8, col_limit);
+                else if (max_xy < 12)
+                    col_limit = FFMIN(24, col_limit);
+                s->hevcdsp.idct[log2_trafo_size - 2](coeffs, col_limit);
+            }
+        }
+    }
+    if (lc->tu.cross_pf) {
+        int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
+
+        for (i = 0; i < (trafo_size * trafo_size); i++) {
+            coeffs[i] = coeffs[i] + ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
+        }
+    }
+    s->hevcdsp.add_residual[log2_trafo_size-2](dst, coeffs, stride);
+}
+
+void ff_hevc_hls_mvd_coding(HEVCLocalContext *lc, int x0, int y0, int log2_cb_size)
+{
+    int x = abs_mvd_greater0_flag_decode(lc);
+    int y = abs_mvd_greater0_flag_decode(lc);
+
+    if (x)
+        x += abs_mvd_greater1_flag_decode(lc);
+    if (y)
+        y += abs_mvd_greater1_flag_decode(lc);
+
+    switch (x) {
+    case 2: lc->pu.mvd.x = mvd_decode(lc);           break;
+    case 1: lc->pu.mvd.x = mvd_sign_flag_decode(lc); break;
+    case 0: lc->pu.mvd.x = 0;                       break;
+    }
+
+    switch (y) {
+    case 2: lc->pu.mvd.y = mvd_decode(lc);           break;
+    case 1: lc->pu.mvd.y = mvd_sign_flag_decode(lc); break;
+    case 0: lc->pu.mvd.y = 0;                       break;
+    }
+}
+
diff --git a/libavcodec/hevc/data.c b/libavcodec/hevc/data.c
new file mode 100644
index 0000000000..8a4f74c3cb
--- /dev/null
+++ b/libavcodec/hevc/data.c
@@ -0,0 +1,75 @@
+/*
+ * HEVC shared tables
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "data.h"
+
+const uint8_t ff_hevc_diag_scan4x4_x[16] = {
+    0, 0, 1, 0,
+    1, 2, 0, 1,
+    2, 3, 1, 2,
+    3, 2, 3, 3,
+};
+
+const uint8_t ff_hevc_diag_scan4x4_y[16] = {
+    0, 1, 0, 2,
+    1, 0, 3, 2,
+    1, 0, 3, 2,
+    1, 3, 2, 3,
+};
+
+const uint8_t ff_hevc_diag_scan8x8_x[64] = {
+    0, 0, 1, 0,
+    1, 2, 0, 1,
+    2, 3, 0, 1,
+    2, 3, 4, 0,
+    1, 2, 3, 4,
+    5, 0, 1, 2,
+    3, 4, 5, 6,
+    0, 1, 2, 3,
+    4, 5, 6, 7,
+    1, 2, 3, 4,
+    5, 6, 7, 2,
+    3, 4, 5, 6,
+    7, 3, 4, 5,
+    6, 7, 4, 5,
+    6, 7, 5, 6,
+    7, 6, 7, 7,
+};
+
+const uint8_t ff_hevc_diag_scan8x8_y[64] = {
+    0, 1, 0, 2,
+    1, 0, 3, 2,
+    1, 0, 4, 3,
+    2, 1, 0, 5,
+    4, 3, 2, 1,
+    0, 6, 5, 4,
+    3, 2, 1, 0,
+    7, 6, 5, 4,
+    3, 2, 1, 0,
+    7, 6, 5, 4,
+    3, 2, 1, 7,
+    6, 5, 4, 3,
+    2, 7, 6, 5,
+    4, 3, 7, 6,
+    5, 4, 7, 6,
+    5, 7, 6, 7,
+};
diff --git a/libavcodec/hevc/data.h b/libavcodec/hevc/data.h
new file mode 100644
index 0000000000..74558f0a98
--- /dev/null
+++ b/libavcodec/hevc/data.h
@@ -0,0 +1,31 @@
+/*
+ * HEVC shared data tables
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HEVC_DATA_H
+#define AVCODEC_HEVC_DATA_H
+
+#include <stdint.h>
+
+extern const uint8_t ff_hevc_diag_scan4x4_x[16];
+extern const uint8_t ff_hevc_diag_scan4x4_y[16];
+extern const uint8_t ff_hevc_diag_scan8x8_x[64];
+extern const uint8_t ff_hevc_diag_scan8x8_y[64];
+
+#endif /* AVCODEC_HEVC_DATA_H */
diff --git a/libavcodec/hevc/dsp.c b/libavcodec/hevc/dsp.c
new file mode 100644
index 0000000000..60f059292c
--- /dev/null
+++ b/libavcodec/hevc/dsp.c
@@ -0,0 +1,275 @@
+/*
+ * HEVC video decoder
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ * Copyright (C) 2013 - 2014 Pierre-Edouard Lepere
+ *
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dsp.h"
+
+static const int8_t transform[32][32] = {
+    { 64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,
+      64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64 },
+    { 90,  90,  88,  85,  82,  78,  73,  67,  61,  54,  46,  38,  31,  22,  13,   4,
+      -4, -13, -22, -31, -38, -46, -54, -61, -67, -73, -78, -82, -85, -88, -90, -90 },
+    { 90,  87,  80,  70,  57,  43,  25,   9,  -9, -25, -43, -57, -70, -80, -87, -90,
+     -90, -87, -80, -70, -57, -43, -25,  -9,   9,  25,  43,  57,  70,  80,  87,  90 },
+    { 90,  82,  67,  46,  22,  -4, -31, -54, -73, -85, -90, -88, -78, -61, -38, -13,
+      13,  38,  61,  78,  88,  90,  85,  73,  54,  31,   4, -22, -46, -67, -82, -90 },
+    { 89,  75,  50,  18, -18, -50, -75, -89, -89, -75, -50, -18,  18,  50,  75,  89,
+      89,  75,  50,  18, -18, -50, -75, -89, -89, -75, -50, -18,  18,  50,  75,  89 },
+    { 88,  67,  31, -13, -54, -82, -90, -78, -46, -4,   38,  73,  90,  85,  61,  22,
+     -22, -61, -85, -90, -73, -38,   4,  46,  78,  90,  82,  54,  13, -31, -67, -88 },
+    { 87,  57,   9, -43, -80, -90, -70, -25,  25,  70,  90,  80,  43,  -9, -57, -87,
+     -87, -57,  -9,  43,  80,  90,  70,  25, -25, -70, -90, -80, -43,   9,  57,  87 },
+    { 85,  46, -13, -67, -90, -73, -22,  38,  82,  88,  54,  -4, -61, -90, -78, -31,
+      31,  78,  90,  61,   4, -54, -88, -82, -38,  22,  73,  90,  67,  13, -46, -85 },
+    { 83,  36, -36, -83, -83, -36,  36,  83,  83,  36, -36, -83, -83, -36,  36,  83,
+      83,  36, -36, -83, -83, -36,  36,  83,  83,  36, -36, -83, -83, -36,  36,  83 },
+    { 82,  22, -54, -90, -61,  13,  78,  85,  31, -46, -90, -67,   4,  73,  88,  38,
+     -38, -88, -73,  -4,  67,  90,  46, -31, -85, -78, -13,  61,  90,  54, -22, -82 },
+    { 80,   9, -70, -87, -25,  57,  90,  43, -43, -90, -57,  25,  87,  70,  -9, -80,
+     -80,  -9,  70,  87,  25, -57, -90, -43,  43,  90,  57, -25, -87, -70,   9,  80 },
+    { 78,  -4, -82, -73,  13,  85,  67, -22, -88, -61,  31,  90,  54, -38, -90, -46,
+      46,  90,  38, -54, -90, -31,  61,  88,  22, -67, -85, -13,  73,  82,   4, -78 },
+    { 75, -18, -89, -50,  50,  89,  18, -75, -75,  18,  89,  50, -50, -89, -18,  75,
+      75, -18, -89, -50,  50,  89,  18, -75, -75,  18,  89,  50, -50, -89, -18,  75 },
+    { 73, -31, -90, -22,  78,  67, -38, -90, -13,  82,  61, -46, -88,  -4,  85,  54,
+     -54, -85,   4,  88,  46, -61, -82,  13,  90,  38, -67, -78,  22,  90,  31, -73 },
+    { 70, -43, -87,   9,  90,  25, -80, -57,  57,  80, -25, -90,  -9,  87,  43, -70,
+     -70,  43,  87,  -9, -90, -25,  80,  57, -57, -80,  25,  90,   9, -87, -43,  70 },
+    { 67, -54, -78,  38,  85, -22, -90,   4,  90,  13, -88, -31,  82,  46, -73, -61,
+      61,  73, -46, -82,  31,  88, -13, -90,  -4,  90,  22, -85, -38,  78,  54, -67 },
+    { 64, -64, -64,  64,  64, -64, -64,  64,  64, -64, -64,  64,  64, -64, -64,  64,
+      64, -64, -64,  64,  64, -64, -64,  64,  64, -64, -64,  64,  64, -64, -64,  64 },
+    { 61, -73, -46,  82,  31, -88, -13,  90,  -4, -90,  22,  85, -38, -78,  54,  67,
+     -67, -54,  78,  38, -85, -22,  90,   4, -90,  13,  88, -31, -82,  46,  73, -61 },
+    { 57, -80, -25,  90,  -9, -87,  43,  70, -70, -43,  87,   9, -90,  25,  80, -57,
+     -57,  80,  25, -90,   9,  87, -43, -70,  70,  43, -87,  -9,  90, -25, -80,  57 },
+    { 54, -85,  -4,  88, -46, -61,  82,  13, -90,  38,  67, -78, -22,  90, -31, -73,
+      73,  31, -90,  22,  78, -67, -38,  90, -13, -82,  61,  46, -88,   4,  85, -54 },
+    { 50, -89,  18,  75, -75, -18,  89, -50, -50,  89, -18, -75,  75,  18, -89,  50,
+      50, -89,  18,  75, -75, -18,  89, -50, -50,  89, -18, -75,  75,  18, -89,  50 },
+    { 46, -90,  38,  54, -90,  31,  61, -88,  22,  67, -85,  13,  73, -82,   4,  78,
+     -78,  -4,  82, -73, -13,  85, -67, -22,  88, -61, -31,  90, -54, -38,  90, -46 },
+    { 43, -90,  57,  25, -87,  70,   9, -80,  80,  -9, -70,  87, -25, -57,  90, -43,
+     -43,  90, -57, -25,  87, -70,  -9,  80, -80,   9,  70, -87,  25,  57, -90,  43 },
+    { 38, -88,  73,  -4, -67,  90, -46, -31,  85, -78,  13,  61, -90,  54,  22, -82,
+      82, -22, -54,  90, -61, -13,  78, -85,  31,  46, -90,  67,   4, -73,  88, -38 },
+    { 36, -83,  83, -36, -36,  83, -83,  36,  36, -83,  83, -36, -36,  83, -83,  36,
+      36, -83,  83, -36, -36,  83, -83,  36,  36, -83,  83, -36, -36,  83, -83,  36 },
+    { 31, -78,  90, -61,   4,  54, -88,  82, -38, -22,  73, -90,  67, -13, -46,  85,
+     -85,  46,  13, -67,  90, -73,  22,  38, -82,  88, -54,  -4,  61, -90,  78, -31 },
+    { 25, -70,  90, -80,  43,   9, -57,  87, -87,  57,  -9, -43,  80, -90,  70, -25,
+     -25,  70, -90,  80, -43,  -9,  57, -87,  87, -57,   9,  43, -80,  90, -70,  25 },
+    { 22, -61,  85, -90,  73, -38,  -4,  46, -78,  90, -82,  54, -13, -31,  67, -88,
+      88, -67,  31,  13, -54,  82, -90,  78, -46,   4,  38, -73,  90, -85,  61, -22 },
+    { 18, -50,  75, -89,  89, -75,  50, -18, -18,  50, -75,  89, -89,  75, -50,  18,
+      18, -50,  75, -89,  89, -75,  50, -18, -18,  50, -75,  89, -89,  75, -50,  18 },
+    { 13, -38,  61, -78,  88, -90,  85, -73,  54, -31,   4,  22, -46,  67, -82,  90,
+     -90,  82, -67,  46, -22,  -4,  31, -54,  73, -85,  90, -88,  78, -61,  38, -13 },
+    {  9, -25,  43, -57,  70, -80,  87, -90,  90, -87,  80, -70,  57, -43,  25, -9,
+      -9,  25, -43,  57, -70,  80, -87,  90, -90,  87, -80,  70, -57,  43, -25,   9 },
+    {  4, -13,  22, -31,  38, -46,  54, -61,  67, -73,  78, -82,  85, -88,  90, -90,
+      90, -90,  88, -85,  82, -78,  73, -67,  61, -54,  46, -38,  31, -22,  13,  -4 },
+};
+
+DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters)[8][4] = {
+    {  0 },
+    { -2, 58, 10, -2},
+    { -4, 54, 16, -2},
+    { -6, 46, 28, -4},
+    { -4, 36, 36, -4},
+    { -4, 28, 46, -6},
+    { -2, 16, 54, -4},
+    { -2, 10, 58, -2},
+};
+
+DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters)[4][16] = {
+    { 0 },
+    { -1,  4,-10, 58, 17, -5,  1,  0, -1,  4,-10, 58, 17, -5,  1,  0},
+    { -1,  4,-11, 40, 40,-11,  4, -1, -1,  4,-11, 40, 40,-11,  4, -1},
+    {  0,  1, -5, 17, 58,-10,  4, -1,  0,  1, -5, 17, 58,-10,  4, -1}
+};
+
+#define BIT_DEPTH 8
+#include "dsp_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 9
+#include "dsp_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "dsp_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 12
+#include "dsp_template.c"
+#undef BIT_DEPTH
+
+void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
+{
+#undef FUNC
+#define FUNC(a, depth) a ## _ ## depth
+
+#undef PEL_FUNC
+#define PEL_FUNC(dst1, idx1, idx2, a, depth)                                   \
+    for(i = 0 ; i < 10 ; i++)                                                  \
+{                                                                              \
+    hevcdsp->dst1[i][idx1][idx2] = a ## _ ## depth;                            \
+}
+
+#undef EPEL_FUNCS
+#define EPEL_FUNCS(depth)                                                     \
+    PEL_FUNC(put_hevc_epel, 0, 0, put_hevc_pel_pixels, depth);                \
+    PEL_FUNC(put_hevc_epel, 0, 1, put_hevc_epel_h, depth);                    \
+    PEL_FUNC(put_hevc_epel, 1, 0, put_hevc_epel_v, depth);                    \
+    PEL_FUNC(put_hevc_epel, 1, 1, put_hevc_epel_hv, depth)
+
+#undef EPEL_UNI_FUNCS
+#define EPEL_UNI_FUNCS(depth)                                                 \
+    PEL_FUNC(put_hevc_epel_uni, 0, 0, put_hevc_pel_uni_pixels, depth);        \
+    PEL_FUNC(put_hevc_epel_uni, 0, 1, put_hevc_epel_uni_h, depth);            \
+    PEL_FUNC(put_hevc_epel_uni, 1, 0, put_hevc_epel_uni_v, depth);            \
+    PEL_FUNC(put_hevc_epel_uni, 1, 1, put_hevc_epel_uni_hv, depth);           \
+    PEL_FUNC(put_hevc_epel_uni_w, 0, 0, put_hevc_pel_uni_w_pixels, depth);    \
+    PEL_FUNC(put_hevc_epel_uni_w, 0, 1, put_hevc_epel_uni_w_h, depth);        \
+    PEL_FUNC(put_hevc_epel_uni_w, 1, 0, put_hevc_epel_uni_w_v, depth);        \
+    PEL_FUNC(put_hevc_epel_uni_w, 1, 1, put_hevc_epel_uni_w_hv, depth)
+
+#undef EPEL_BI_FUNCS
+#define EPEL_BI_FUNCS(depth)                                                \
+    PEL_FUNC(put_hevc_epel_bi, 0, 0, put_hevc_pel_bi_pixels, depth);        \
+    PEL_FUNC(put_hevc_epel_bi, 0, 1, put_hevc_epel_bi_h, depth);            \
+    PEL_FUNC(put_hevc_epel_bi, 1, 0, put_hevc_epel_bi_v, depth);            \
+    PEL_FUNC(put_hevc_epel_bi, 1, 1, put_hevc_epel_bi_hv, depth);           \
+    PEL_FUNC(put_hevc_epel_bi_w, 0, 0, put_hevc_pel_bi_w_pixels, depth);    \
+    PEL_FUNC(put_hevc_epel_bi_w, 0, 1, put_hevc_epel_bi_w_h, depth);        \
+    PEL_FUNC(put_hevc_epel_bi_w, 1, 0, put_hevc_epel_bi_w_v, depth);        \
+    PEL_FUNC(put_hevc_epel_bi_w, 1, 1, put_hevc_epel_bi_w_hv, depth)
+
+#undef QPEL_FUNCS
+#define QPEL_FUNCS(depth)                                                     \
+    PEL_FUNC(put_hevc_qpel, 0, 0, put_hevc_pel_pixels, depth);                \
+    PEL_FUNC(put_hevc_qpel, 0, 1, put_hevc_qpel_h, depth);                    \
+    PEL_FUNC(put_hevc_qpel, 1, 0, put_hevc_qpel_v, depth);                    \
+    PEL_FUNC(put_hevc_qpel, 1, 1, put_hevc_qpel_hv, depth)
+
+#undef QPEL_UNI_FUNCS
+#define QPEL_UNI_FUNCS(depth)                                                 \
+    PEL_FUNC(put_hevc_qpel_uni, 0, 0, put_hevc_pel_uni_pixels, depth);        \
+    PEL_FUNC(put_hevc_qpel_uni, 0, 1, put_hevc_qpel_uni_h, depth);            \
+    PEL_FUNC(put_hevc_qpel_uni, 1, 0, put_hevc_qpel_uni_v, depth);            \
+    PEL_FUNC(put_hevc_qpel_uni, 1, 1, put_hevc_qpel_uni_hv, depth);           \
+    PEL_FUNC(put_hevc_qpel_uni_w, 0, 0, put_hevc_pel_uni_w_pixels, depth);    \
+    PEL_FUNC(put_hevc_qpel_uni_w, 0, 1, put_hevc_qpel_uni_w_h, depth);        \
+    PEL_FUNC(put_hevc_qpel_uni_w, 1, 0, put_hevc_qpel_uni_w_v, depth);        \
+    PEL_FUNC(put_hevc_qpel_uni_w, 1, 1, put_hevc_qpel_uni_w_hv, depth)
+
+#undef QPEL_BI_FUNCS
+#define QPEL_BI_FUNCS(depth)                                                  \
+    PEL_FUNC(put_hevc_qpel_bi, 0, 0, put_hevc_pel_bi_pixels, depth);          \
+    PEL_FUNC(put_hevc_qpel_bi, 0, 1, put_hevc_qpel_bi_h, depth);              \
+    PEL_FUNC(put_hevc_qpel_bi, 1, 0, put_hevc_qpel_bi_v, depth);              \
+    PEL_FUNC(put_hevc_qpel_bi, 1, 1, put_hevc_qpel_bi_hv, depth);             \
+    PEL_FUNC(put_hevc_qpel_bi_w, 0, 0, put_hevc_pel_bi_w_pixels, depth);      \
+    PEL_FUNC(put_hevc_qpel_bi_w, 0, 1, put_hevc_qpel_bi_w_h, depth);          \
+    PEL_FUNC(put_hevc_qpel_bi_w, 1, 0, put_hevc_qpel_bi_w_v, depth);          \
+    PEL_FUNC(put_hevc_qpel_bi_w, 1, 1, put_hevc_qpel_bi_w_hv, depth)
+
+#define HEVC_DSP(depth)                                                     \
+    hevcdsp->put_pcm                = FUNC(put_pcm, depth);                 \
+    hevcdsp->add_residual[0]        = FUNC(add_residual4x4, depth);         \
+    hevcdsp->add_residual[1]        = FUNC(add_residual8x8, depth);         \
+    hevcdsp->add_residual[2]        = FUNC(add_residual16x16, depth);       \
+    hevcdsp->add_residual[3]        = FUNC(add_residual32x32, depth);       \
+    hevcdsp->dequant                = FUNC(dequant, depth);                 \
+    hevcdsp->transform_rdpcm        = FUNC(transform_rdpcm, depth);         \
+    hevcdsp->transform_4x4_luma     = FUNC(transform_4x4_luma, depth);      \
+    hevcdsp->idct[0]                = FUNC(idct_4x4, depth);                \
+    hevcdsp->idct[1]                = FUNC(idct_8x8, depth);                \
+    hevcdsp->idct[2]                = FUNC(idct_16x16, depth);              \
+    hevcdsp->idct[3]                = FUNC(idct_32x32, depth);              \
+                                                                            \
+    hevcdsp->idct_dc[0]             = FUNC(idct_4x4_dc, depth);             \
+    hevcdsp->idct_dc[1]             = FUNC(idct_8x8_dc, depth);             \
+    hevcdsp->idct_dc[2]             = FUNC(idct_16x16_dc, depth);           \
+    hevcdsp->idct_dc[3]             = FUNC(idct_32x32_dc, depth);           \
+                                                                            \
+    hevcdsp->sao_band_filter[0] =                                              \
+    hevcdsp->sao_band_filter[1] =                                              \
+    hevcdsp->sao_band_filter[2] =                                              \
+    hevcdsp->sao_band_filter[3] =                                              \
+    hevcdsp->sao_band_filter[4] = FUNC(sao_band_filter, depth);                \
+    hevcdsp->sao_edge_filter[0] =                                              \
+    hevcdsp->sao_edge_filter[1] =                                              \
+    hevcdsp->sao_edge_filter[2] =                                              \
+    hevcdsp->sao_edge_filter[3] =                                              \
+    hevcdsp->sao_edge_filter[4] = FUNC(sao_edge_filter, depth);                \
+    hevcdsp->sao_edge_restore[0] = FUNC(sao_edge_restore_0, depth);            \
+    hevcdsp->sao_edge_restore[1] = FUNC(sao_edge_restore_1, depth);            \
+                                                                               \
+    QPEL_FUNCS(depth);                                                         \
+    QPEL_UNI_FUNCS(depth);                                                     \
+    QPEL_BI_FUNCS(depth);                                                      \
+    EPEL_FUNCS(depth);                                                         \
+    EPEL_UNI_FUNCS(depth);                                                     \
+    EPEL_BI_FUNCS(depth);                                                      \
+                                                                               \
+    hevcdsp->hevc_h_loop_filter_luma     = FUNC(hevc_h_loop_filter_luma, depth);   \
+    hevcdsp->hevc_v_loop_filter_luma     = FUNC(hevc_v_loop_filter_luma, depth);   \
+    hevcdsp->hevc_h_loop_filter_chroma   = FUNC(hevc_h_loop_filter_chroma, depth); \
+    hevcdsp->hevc_v_loop_filter_chroma   = FUNC(hevc_v_loop_filter_chroma, depth); \
+    hevcdsp->hevc_h_loop_filter_luma_c   = FUNC(hevc_h_loop_filter_luma, depth);   \
+    hevcdsp->hevc_v_loop_filter_luma_c   = FUNC(hevc_v_loop_filter_luma, depth);   \
+    hevcdsp->hevc_h_loop_filter_chroma_c = FUNC(hevc_h_loop_filter_chroma, depth); \
+    hevcdsp->hevc_v_loop_filter_chroma_c = FUNC(hevc_v_loop_filter_chroma, depth)
+int i = 0;
+
+    switch (bit_depth) {
+    case 9:
+        HEVC_DSP(9);
+        break;
+    case 10:
+        HEVC_DSP(10);
+        break;
+    case 12:
+        HEVC_DSP(12);
+        break;
+    default:
+        HEVC_DSP(8);
+        break;
+    }
+
+#if ARCH_AARCH64
+    ff_hevc_dsp_init_aarch64(hevcdsp, bit_depth);
+#elif ARCH_ARM
+    ff_hevc_dsp_init_arm(hevcdsp, bit_depth);
+#elif ARCH_PPC
+    ff_hevc_dsp_init_ppc(hevcdsp, bit_depth);
+#elif ARCH_X86
+    ff_hevc_dsp_init_x86(hevcdsp, bit_depth);
+#elif ARCH_MIPS
+    ff_hevc_dsp_init_mips(hevcdsp, bit_depth);
+#elif ARCH_LOONGARCH
+    ff_hevc_dsp_init_loongarch(hevcdsp, bit_depth);
+#endif
+}
diff --git a/libavcodec/hevc/dsp.h b/libavcodec/hevc/dsp.h
new file mode 100644
index 0000000000..02b8e0e8e2
--- /dev/null
+++ b/libavcodec/hevc/dsp.h
@@ -0,0 +1,140 @@
+/*
+ * HEVC video decoder
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ * Copyright (C) 2013 - 2014 Pierre-Edouard Lepere
+ *
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HEVC_DSP_H
+#define AVCODEC_HEVC_DSP_H
+
+#include "libavutil/mem_internal.h"
+
+#include "libavcodec/get_bits.h"
+
+#define MAX_PB_SIZE 64
+
+typedef struct SAOParams {
+    int offset_abs[3][4];   ///< sao_offset_abs
+    int offset_sign[3][4];  ///< sao_offset_sign
+
+    uint8_t band_position[3];   ///< sao_band_position
+
+    int eo_class[3];        ///< sao_eo_class
+
+    int16_t offset_val[3][5];   ///<SaoOffsetVal
+
+    uint8_t type_idx[3];    ///< sao_type_idx
+} SAOParams;
+
+typedef struct HEVCDSPContext {
+    void (*put_pcm)(uint8_t *_dst, ptrdiff_t _stride, int width, int height,
+                    struct GetBitContext *gb, int pcm_bit_depth);
+
+    void (*add_residual[4])(uint8_t *dst, const int16_t *res, ptrdiff_t stride);
+
+    void (*dequant)(int16_t *coeffs, int16_t log2_size);
+
+    void (*transform_rdpcm)(int16_t *coeffs, int16_t log2_size, int mode);
+
+    void (*transform_4x4_luma)(int16_t *coeffs);
+
+    void (*idct[4])(int16_t *coeffs, int col_limit);
+
+    void (*idct_dc[4])(int16_t *coeffs);
+
+    void (*sao_band_filter[5])(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
+                               const int16_t *sao_offset_val, int sao_left_class, int width, int height);
+
+    /* implicit stride_src parameter has value of 2 * MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE */
+    void (*sao_edge_filter[5])(uint8_t *_dst /* align 16 */, const uint8_t *_src /* align 32 */, ptrdiff_t stride_dst,
+                               const int16_t *sao_offset_val, int sao_eo_class, int width, int height);
+
+    void (*sao_edge_restore[2])(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
+                                const struct SAOParams *sao, const int *borders, int _width, int _height, int c_idx,
+                                const uint8_t *vert_edge, const uint8_t *horiz_edge, const uint8_t *diag_edge);
+
+    void (*put_hevc_qpel[10][2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride,
+                                    int height, intptr_t mx, intptr_t my, int width);
+    void (*put_hevc_qpel_uni[10][2][2])(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
+                                        int height, intptr_t mx, intptr_t my, int width);
+    void (*put_hevc_qpel_uni_w[10][2][2])(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
+                                          int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
+
+    void (*put_hevc_qpel_bi[10][2][2])(uint8_t *dst, ptrdiff_t dststride,
+                                       const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                                       int height, intptr_t mx, intptr_t my, int width);
+    void (*put_hevc_qpel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride,
+                                         const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                                         int height, int denom, int wx0, int wx1,
+                                         int ox0, int ox1, intptr_t mx, intptr_t my, int width);
+    void (*put_hevc_epel[10][2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride,
+                                    int height, intptr_t mx, intptr_t my, int width);
+
+    void (*put_hevc_epel_uni[10][2][2])(uint8_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride,
+                                        int height, intptr_t mx, intptr_t my, int width);
+    void (*put_hevc_epel_uni_w[10][2][2])(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
+                                          int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
+    void (*put_hevc_epel_bi[10][2][2])(uint8_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride,
+                                       const int16_t *src2,
+                                       int height, intptr_t mx, intptr_t my, int width);
+    void (*put_hevc_epel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride,
+                                         const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                                         int height, int denom, int wx0, int ox0, int wx1,
+                                         int ox1, intptr_t mx, intptr_t my, int width);
+
+    void (*hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
+                                    int beta, const int32_t *tc,
+                                    const uint8_t *no_p, const uint8_t *no_q);
+    void (*hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
+                                    int beta, const int32_t *tc,
+                                    const uint8_t *no_p, const uint8_t *no_q);
+    void (*hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
+                                      const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q);
+    void (*hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
+                                      const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q);
+    void (*hevc_h_loop_filter_luma_c)(uint8_t *pix, ptrdiff_t stride,
+                                      int beta, const int32_t *tc,
+                                      const uint8_t *no_p, const uint8_t *no_q);
+    void (*hevc_v_loop_filter_luma_c)(uint8_t *pix, ptrdiff_t stride,
+                                      int beta, const int32_t *tc,
+                                      const uint8_t *no_p, const uint8_t *no_q);
+    void (*hevc_h_loop_filter_chroma_c)(uint8_t *pix, ptrdiff_t stride,
+                                        const int32_t *tc, const uint8_t *no_p,
+                                        const uint8_t *no_q);
+    void (*hevc_v_loop_filter_chroma_c)(uint8_t *pix, ptrdiff_t stride,
+                                        const int32_t *tc, const uint8_t *no_p,
+                                        const uint8_t *no_q);
+} HEVCDSPContext;
+
+void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
+
+/** ff_hevc_.pel_filters[0] are dummies to simplify array addressing */
+extern const int8_t ff_hevc_epel_filters[8][4];
+extern const int8_t ff_hevc_qpel_filters[4][16];
+
+void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth);
+void ff_hevc_dsp_init_arm(HEVCDSPContext *c, const int bit_depth);
+void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth);
+void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
+void ff_hevc_dsp_init_mips(HEVCDSPContext *c, const int bit_depth);
+void ff_hevc_dsp_init_loongarch(HEVCDSPContext *c, const int bit_depth);
+
+#endif /* AVCODEC_HEVC_DSP_H */
diff --git a/libavcodec/hevc/dsp_template.c b/libavcodec/hevc/dsp_template.c
new file mode 100644
index 0000000000..aebccd1a0c
--- /dev/null
+++ b/libavcodec/hevc/dsp_template.c
@@ -0,0 +1,934 @@
+/*
+ * HEVC video decoder
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "get_bits.h"
+#include "hevcdec.h"
+
+#include "bit_depth_template.c"
+#include "dsp.h"
+#include "h26x/h2656_sao_template.c"
+#include "h26x/h2656_inter_template.c"
+
+static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
+                          GetBitContext *gb, int pcm_bit_depth)
+{
+    int x, y;
+    pixel *dst = (pixel *)_dst;
+
+    stride /= sizeof(pixel);
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
+        dst += stride;
+    }
+}
+
+static av_always_inline void FUNC(add_residual)(uint8_t *_dst, const int16_t *res,
+                                                ptrdiff_t stride, int size)
+{
+    int x, y;
+    pixel *dst = (pixel *)_dst;
+
+    stride /= sizeof(pixel);
+
+    for (y = 0; y < size; y++) {
+        for (x = 0; x < size; x++) {
+            dst[x] = av_clip_pixel(dst[x] + *res);
+            res++;
+        }
+        dst += stride;
+    }
+}
+
+static void FUNC(add_residual4x4)(uint8_t *_dst, const int16_t *res,
+                                  ptrdiff_t stride)
+{
+    FUNC(add_residual)(_dst, res, stride, 4);
+}
+
+static void FUNC(add_residual8x8)(uint8_t *_dst, const int16_t *res,
+                                  ptrdiff_t stride)
+{
+    FUNC(add_residual)(_dst, res, stride, 8);
+}
+
+static void FUNC(add_residual16x16)(uint8_t *_dst, const int16_t *res,
+                                    ptrdiff_t stride)
+{
+    FUNC(add_residual)(_dst, res, stride, 16);
+}
+
+static void FUNC(add_residual32x32)(uint8_t *_dst, const int16_t *res,
+                                    ptrdiff_t stride)
+{
+    FUNC(add_residual)(_dst, res, stride, 32);
+}
+
+static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
+{
+    int16_t *coeffs = (int16_t *) _coeffs;
+    int x, y;
+    int size = 1 << log2_size;
+
+    if (mode) {
+        coeffs += size;
+        for (y = 0; y < size - 1; y++) {
+            for (x = 0; x < size; x++)
+                coeffs[x] += coeffs[x - size];
+            coeffs += size;
+        }
+    } else {
+        for (y = 0; y < size; y++) {
+            for (x = 1; x < size; x++)
+                coeffs[x] += coeffs[x - 1];
+            coeffs += size;
+        }
+    }
+}
+
+static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
+{
+    int shift  = 15 - BIT_DEPTH - log2_size;
+    int x, y;
+    int size = 1 << log2_size;
+
+    if (shift > 0) {
+        int offset = 1 << (shift - 1);
+        for (y = 0; y < size; y++) {
+            for (x = 0; x < size; x++) {
+                *coeffs = (*coeffs + offset) >> shift;
+                coeffs++;
+            }
+        }
+    } else {
+        for (y = 0; y < size; y++) {
+            for (x = 0; x < size; x++) {
+                *coeffs = *(uint16_t*)coeffs << -shift;
+                coeffs++;
+            }
+        }
+    }
+}
+
+#define SET(dst, x)   (dst) = (x)
+#define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
+
+#define TR_4x4_LUMA(dst, src, step, assign)                             \
+    do {                                                                \
+        int c0 = src[0 * step] + src[2 * step];                         \
+        int c1 = src[2 * step] + src[3 * step];                         \
+        int c2 = src[0 * step] - src[3 * step];                         \
+        int c3 = 74 * src[1 * step];                                    \
+                                                                        \
+        assign(dst[2 * step], 74 * (src[0 * step] -                     \
+                                    src[2 * step] +                     \
+                                    src[3 * step]));                    \
+        assign(dst[0 * step], 29 * c0 + 55 * c1 + c3);                  \
+        assign(dst[1 * step], 55 * c2 - 29 * c1 + c3);                  \
+        assign(dst[3 * step], 55 * c0 + 29 * c2 - c3);                  \
+    } while (0)
+
+static void FUNC(transform_4x4_luma)(int16_t *coeffs)
+{
+    int i;
+    int shift    = 7;
+    int add      = 1 << (shift - 1);
+    int16_t *src = coeffs;
+
+    for (i = 0; i < 4; i++) {
+        TR_4x4_LUMA(src, src, 4, SCALE);
+        src++;
+    }
+
+    shift = 20 - BIT_DEPTH;
+    add   = 1 << (shift - 1);
+    for (i = 0; i < 4; i++) {
+        TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
+        coeffs += 4;
+    }
+}
+
+#undef TR_4x4_LUMA
+
+#define TR_4(dst, src, dstep, sstep, assign, end)                 \
+    do {                                                          \
+        const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
+        const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
+        const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
+        const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
+                                                                  \
+        assign(dst[0 * dstep], e0 + o0);                          \
+        assign(dst[1 * dstep], e1 + o1);                          \
+        assign(dst[2 * dstep], e1 - o1);                          \
+        assign(dst[3 * dstep], e0 - o0);                          \
+    } while (0)
+
+#define TR_8(dst, src, dstep, sstep, assign, end)                 \
+    do {                                                          \
+        int i, j;                                                 \
+        int e_8[4];                                               \
+        int o_8[4] = { 0 };                                       \
+        for (i = 0; i < 4; i++)                                   \
+            for (j = 1; j < end; j += 2)                          \
+                o_8[i] += transform[4 * j][i] * src[j * sstep];   \
+        TR_4(e_8, src, 1, 2 * sstep, SET, 4);                     \
+                                                                  \
+        for (i = 0; i < 4; i++) {                                 \
+            assign(dst[i * dstep], e_8[i] + o_8[i]);              \
+            assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]);        \
+        }                                                         \
+    } while (0)
+
+#define TR_16(dst, src, dstep, sstep, assign, end)                \
+    do {                                                          \
+        int i, j;                                                 \
+        int e_16[8];                                              \
+        int o_16[8] = { 0 };                                      \
+        for (i = 0; i < 8; i++)                                   \
+            for (j = 1; j < end; j += 2)                          \
+                o_16[i] += transform[2 * j][i] * src[j * sstep];  \
+        TR_8(e_16, src, 1, 2 * sstep, SET, 8);                    \
+                                                                  \
+        for (i = 0; i < 8; i++) {                                 \
+            assign(dst[i * dstep], e_16[i] + o_16[i]);            \
+            assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]);     \
+        }                                                         \
+    } while (0)
+
+#define TR_32(dst, src, dstep, sstep, assign, end)                \
+    do {                                                          \
+        int i, j;                                                 \
+        int e_32[16];                                             \
+        int o_32[16] = { 0 };                                     \
+        for (i = 0; i < 16; i++)                                  \
+            for (j = 1; j < end; j += 2)                          \
+                o_32[i] += transform[j][i] * src[j * sstep];      \
+        TR_16(e_32, src, 1, 2 * sstep, SET, end / 2);             \
+                                                                  \
+        for (i = 0; i < 16; i++) {                                \
+            assign(dst[i * dstep], e_32[i] + o_32[i]);            \
+            assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]);     \
+        }                                                         \
+    } while (0)
+
+#define IDCT_VAR4(H)                                              \
+    int limit2 = FFMIN(col_limit + 4, H)
+#define IDCT_VAR8(H)                                              \
+    int limit  = FFMIN(col_limit, H);                             \
+    int limit2 = FFMIN(col_limit + 4, H)
+#define IDCT_VAR16(H)   IDCT_VAR8(H)
+#define IDCT_VAR32(H)   IDCT_VAR8(H)
+
+#define IDCT(H)                                                   \
+static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs,          \
+                                        int col_limit)            \
+{                                                                 \
+    int i;                                                        \
+    int      shift = 7;                                           \
+    int      add   = 1 << (shift - 1);                            \
+    int16_t *src   = coeffs;                                      \
+    IDCT_VAR ## H(H);                                             \
+                                                                  \
+    for (i = 0; i < H; i++) {                                     \
+        TR_ ## H(src, src, H, H, SCALE, limit2);                  \
+        if (limit2 < H && i%4 == 0 && !!i)                        \
+            limit2 -= 4;                                          \
+        src++;                                                    \
+    }                                                             \
+                                                                  \
+    shift = 20 - BIT_DEPTH;                                       \
+    add   = 1 << (shift - 1);                                     \
+    for (i = 0; i < H; i++) {                                     \
+        TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit);             \
+        coeffs += H;                                              \
+    }                                                             \
+}
+
+#define IDCT_DC(H)                                                \
+static void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs)    \
+{                                                                 \
+    int i, j;                                                     \
+    int shift = 14 - BIT_DEPTH;                                   \
+    int add   = 1 << (shift - 1);                                 \
+    int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift;          \
+                                                                  \
+    for (j = 0; j < H; j++) {                                     \
+        for (i = 0; i < H; i++) {                                 \
+            coeffs[i + j * H] = coeff;                            \
+        }                                                         \
+    }                                                             \
+}
+
+IDCT( 4)
+IDCT( 8)
+IDCT(16)
+IDCT(32)
+
+IDCT_DC( 4)
+IDCT_DC( 8)
+IDCT_DC(16)
+IDCT_DC(32)
+
+#undef TR_4
+#undef TR_8
+#undef TR_16
+#undef TR_32
+
+#undef SET
+#undef SCALE
+
+////////////////////////////////////////////////////////////////////////////////
+//
+////////////////////////////////////////////////////////////////////////////////
+#define ff_hevc_pel_filters ff_hevc_qpel_filters
+#define DECL_HV_FILTER(f)                              \
+    const uint8_t *hf = ff_hevc_ ## f ## _filters[mx]; \
+    const uint8_t *vf = ff_hevc_ ## f ## _filters[my];
+
+#define FW_PUT(p, f, t)                                                                                   \
+static void FUNC(put_hevc_## f)(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height,        \
+                                  intptr_t mx, intptr_t my, int width)                                    \
+{                                                                                                         \
+    DECL_HV_FILTER(p)                                                                                     \
+    FUNC(put_ ## t)(dst, src, srcstride, height, hf, vf, width);                                          \
+}
+
+#define FW_PUT_UNI(p, f, t)                                                                               \
+static void FUNC(put_hevc_ ## f)(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src,                   \
+                                  ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width)   \
+{                                                                                                         \
+    DECL_HV_FILTER(p)                                                                                     \
+    FUNC(put_ ## t)(dst, dststride, src, srcstride, height, hf, vf, width);                           \
+}
+
+#define FW_PUT_UNI_W(p, f, t)                                                                             \
+static void FUNC(put_hevc_ ## f)(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src,                   \
+                                  ptrdiff_t srcstride,int height, int denom, int wx, int ox,              \
+                                  intptr_t mx, intptr_t my, int width)                                    \
+{                                                                                                         \
+    DECL_HV_FILTER(p)                                                                                     \
+    FUNC(put_ ## t)(dst, dststride, src, srcstride, height, denom, wx, ox, hf, vf, width);            \
+}
+
+#define FW_PUT_FUNCS(f, t, dir)                                       \
+    FW_PUT(f, f ## _ ## dir, t ## _ ## dir)                     \
+    FW_PUT_UNI(f, f ## _uni_ ## dir, uni_ ## t ## _ ## dir)        \
+    FW_PUT_UNI_W(f, f ## _uni_w_ ## dir, uni_## t ## _w_ ## dir)
+
+FW_PUT(pel, pel_pixels, pixels)
+FW_PUT_UNI(pel, pel_uni_pixels, uni_pixels)
+FW_PUT_UNI_W(pel, pel_uni_w_pixels, uni_w_pixels)
+
+FW_PUT_FUNCS(qpel, luma,   h     )
+FW_PUT_FUNCS(qpel, luma,   v     )
+FW_PUT_FUNCS(qpel, luma,   hv    )
+FW_PUT_FUNCS(epel, chroma, h     )
+FW_PUT_FUNCS(epel, chroma, v     )
+FW_PUT_FUNCS(epel, chroma, hv    )
+
+static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
+                                         const int16_t *src2,
+                                         int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const pixel *src    = (const pixel *)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+
+    int shift = 14  + 1 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
+        src  += srcstride;
+        dst  += dststride;
+        src2 += MAX_PB_SIZE;
+    }
+}
+
+static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
+                                           const int16_t *src2,
+                                           int height, int denom, int wx0, int wx1,
+                                           int ox0, int ox1, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const pixel *src    = (const pixel *)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+
+    int shift = 14  + 1 - BIT_DEPTH;
+    int log2Wd = denom + shift - 1;
+
+    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
+    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++) {
+            dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1));
+        }
+        src  += srcstride;
+        dst  += dststride;
+        src2 += MAX_PB_SIZE;
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//
+////////////////////////////////////////////////////////////////////////////////
+#define QPEL_FILTER(src, stride)                                               \
+    (filter[0] * src[x - 3 * stride] +                                         \
+     filter[1] * src[x - 2 * stride] +                                         \
+     filter[2] * src[x -     stride] +                                         \
+     filter[3] * src[x             ] +                                         \
+     filter[4] * src[x +     stride] +                                         \
+     filter[5] * src[x + 2 * stride] +                                         \
+     filter[6] * src[x + 3 * stride] +                                         \
+     filter[7] * src[x + 4 * stride])
+
+static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
+                                     const int16_t *src2,
+                                     int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const pixel  *src       = (const pixel*)_src;
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+
+    const int8_t *filter    = ff_hevc_qpel_filters[mx];
+
+    int shift = 14  + 1 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
+        src  += srcstride;
+        dst  += dststride;
+        src2 += MAX_PB_SIZE;
+    }
+}
+
+static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride,
+                                     const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                                     int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const pixel  *src       = (const pixel*)_src;
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+
+    const int8_t *filter    = ff_hevc_qpel_filters[my];
+
+    int shift = 14 + 1 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
+        src  += srcstride;
+        dst  += dststride;
+        src2 += MAX_PB_SIZE;
+    }
+}
+
+static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride,
+                                      const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                                      int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const int8_t *filter;
+    const pixel *src = (const pixel*)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
+    int16_t *tmp = tmp_array;
+    int shift = 14 + 1 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    src   -= QPEL_EXTRA_BEFORE * srcstride;
+    filter = ff_hevc_qpel_filters[mx];
+    for (y = 0; y < height + QPEL_EXTRA; y++) {
+        for (x = 0; x < width; x++)
+            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
+        src += srcstride;
+        tmp += MAX_PB_SIZE;
+    }
+
+    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
+    filter = ff_hevc_qpel_filters[my];
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
+        tmp  += MAX_PB_SIZE;
+        dst  += dststride;
+        src2 += MAX_PB_SIZE;
+    }
+}
+
+static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
+                                       const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                                       int height, int denom, int wx0, int wx1,
+                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const pixel  *src       = (const pixel*)_src;
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+
+    const int8_t *filter    = ff_hevc_qpel_filters[mx];
+
+    int shift = 14  + 1 - BIT_DEPTH;
+    int log2Wd = denom + shift - 1;
+
+    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
+    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
+                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
+        src  += srcstride;
+        dst  += dststride;
+        src2 += MAX_PB_SIZE;
+    }
+}
+
+static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
+                                       const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                                       int height, int denom, int wx0, int wx1,
+                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const pixel  *src       = (const pixel*)_src;
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+
+    const int8_t *filter    = ff_hevc_qpel_filters[my];
+
+    int shift = 14 + 1 - BIT_DEPTH;
+    int log2Wd = denom + shift - 1;
+
+    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
+    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
+                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
+        src  += srcstride;
+        dst  += dststride;
+        src2 += MAX_PB_SIZE;
+    }
+}
+
+static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
+                                        const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                                        int height, int denom, int wx0, int wx1,
+                                        int ox0, int ox1, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const int8_t *filter;
+    const pixel *src = (const pixel*)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
+    int16_t *tmp = tmp_array;
+    int shift = 14 + 1 - BIT_DEPTH;
+    int log2Wd = denom + shift - 1;
+
+    src   -= QPEL_EXTRA_BEFORE * srcstride;
+    filter = ff_hevc_qpel_filters[mx];
+    for (y = 0; y < height + QPEL_EXTRA; y++) {
+        for (x = 0; x < width; x++)
+            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
+        src += srcstride;
+        tmp += MAX_PB_SIZE;
+    }
+
+    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
+    filter = ff_hevc_qpel_filters[my];
+
+    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
+    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
+                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
+        tmp  += MAX_PB_SIZE;
+        dst  += dststride;
+        src2 += MAX_PB_SIZE;
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//
+////////////////////////////////////////////////////////////////////////////////
+#define EPEL_FILTER(src, stride)                                               \
+    (filter[0] * src[x - stride] +                                             \
+     filter[1] * src[x]          +                                             \
+     filter[2] * src[x + stride] +                                             \
+     filter[3] * src[x + 2 * stride])
+
+static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride,
+                                     const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                                     int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const pixel *src = (const pixel *)_src;
+    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[mx];
+    int shift = 14 + 1 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++) {
+            dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
+        }
+        dst  += dststride;
+        src  += srcstride;
+        src2 += MAX_PB_SIZE;
+    }
+}
+
+static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride,
+                                     const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                                     int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const pixel *src = (const pixel *)_src;
+    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[my];
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    int shift = 14 + 1 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
+        dst  += dststride;
+        src  += srcstride;
+        src2 += MAX_PB_SIZE;
+    }
+}
+
+static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride,
+                                      const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                                      int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const pixel *src = (const pixel *)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[mx];
+    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
+    int16_t *tmp = tmp_array;
+    int shift = 14 + 1 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    src -= EPEL_EXTRA_BEFORE * srcstride;
+
+    for (y = 0; y < height + EPEL_EXTRA; y++) {
+        for (x = 0; x < width; x++)
+            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
+        src += srcstride;
+        tmp += MAX_PB_SIZE;
+    }
+
+    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
+    filter = ff_hevc_epel_filters[my];
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
+        tmp  += MAX_PB_SIZE;
+        dst  += dststride;
+        src2 += MAX_PB_SIZE;
+    }
+}
+
+static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
+                                       const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                                       int height, int denom, int wx0, int wx1,
+                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const pixel *src = (const pixel *)_src;
+    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[mx];
+    int shift = 14 + 1 - BIT_DEPTH;
+    int log2Wd = denom + shift - 1;
+
+    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
+    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
+                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
+        src  += srcstride;
+        dst  += dststride;
+        src2 += MAX_PB_SIZE;
+    }
+}
+
+static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
+                                       const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                                       int height, int denom, int wx0, int wx1,
+                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const pixel *src = (const pixel *)_src;
+    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[my];
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    int shift = 14 + 1 - BIT_DEPTH;
+    int log2Wd = denom + shift - 1;
+
+    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
+    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
+                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
+        src  += srcstride;
+        dst  += dststride;
+        src2 += MAX_PB_SIZE;
+    }
+}
+
+static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
+                                        const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                                        int height, int denom, int wx0, int wx1,
+                                        int ox0, int ox1, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const pixel *src = (const pixel *)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[mx];
+    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
+    int16_t *tmp = tmp_array;
+    int shift = 14 + 1 - BIT_DEPTH;
+    int log2Wd = denom + shift - 1;
+
+    src -= EPEL_EXTRA_BEFORE * srcstride;
+
+    for (y = 0; y < height + EPEL_EXTRA; y++) {
+        for (x = 0; x < width; x++)
+            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
+        src += srcstride;
+        tmp += MAX_PB_SIZE;
+    }
+
+    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
+    filter = ff_hevc_epel_filters[my];
+
+    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
+    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
+                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
+        tmp  += MAX_PB_SIZE;
+        dst  += dststride;
+        src2 += MAX_PB_SIZE;
+    }
+}
+
+// line zero
+#define P3 pix[-4 * xstride]
+#define P2 pix[-3 * xstride]
+#define P1 pix[-2 * xstride]
+#define P0 pix[-1 * xstride]
+#define Q0 pix[0 * xstride]
+#define Q1 pix[1 * xstride]
+#define Q2 pix[2 * xstride]
+#define Q3 pix[3 * xstride]
+
+// line three. used only for deblocking decision
+#define TP3 pix[-4 * xstride + 3 * ystride]
+#define TP2 pix[-3 * xstride + 3 * ystride]
+#define TP1 pix[-2 * xstride + 3 * ystride]
+#define TP0 pix[-1 * xstride + 3 * ystride]
+#define TQ0 pix[0  * xstride + 3 * ystride]
+#define TQ1 pix[1  * xstride + 3 * ystride]
+#define TQ2 pix[2  * xstride + 3 * ystride]
+#define TQ3 pix[3  * xstride + 3 * ystride]
+
+#include "h26x/h2656_deblock_template.c"
+
+static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
+                                        ptrdiff_t _xstride, ptrdiff_t _ystride,
+                                        int beta, const int *_tc,
+                                        const uint8_t *_no_p, const uint8_t *_no_q)
+{
+    ptrdiff_t xstride = _xstride / sizeof(pixel);
+    ptrdiff_t ystride = _ystride / sizeof(pixel);
+
+    beta <<= BIT_DEPTH - 8;
+
+    for (int j = 0; j < 2; j++) {
+        pixel* pix     = (pixel*)_pix + j * 4 * ystride;
+        const int dp0  = abs(P2  - 2 * P1  + P0);
+        const int dq0  = abs(Q2  - 2 * Q1  + Q0);
+        const int dp3  = abs(TP2 - 2 * TP1 + TP0);
+        const int dq3  = abs(TQ2 - 2 * TQ1 + TQ0);
+        const int d0   = dp0 + dq0;
+        const int d3   = dp3 + dq3;
+        const int tc   = _tc[j]   << (BIT_DEPTH - 8);
+        const int no_p = _no_p[j];
+        const int no_q = _no_q[j];
+
+        if (d0 + d3 < beta) {
+            const int beta_3 = beta >> 3;
+            const int beta_2 = beta >> 2;
+            const int tc25   = ((tc * 5 + 1) >> 1);
+
+            if (abs(P3  -  P0) + abs(Q3  -  Q0) < beta_3 && abs(P0  -  Q0) < tc25 &&
+                abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
+                                      (d0 << 1) < beta_2 &&      (d3 << 1) < beta_2) {
+                const int tc2 = tc << 1;
+                FUNC(loop_filter_luma_strong)(pix, xstride, ystride, tc2, tc2, tc2, no_p, no_q);
+            } else {
+                int nd_p = 1;
+                int nd_q = 1;
+                if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
+                    nd_p = 2;
+                if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
+                    nd_q = 2;
+                FUNC(loop_filter_luma_weak)(pix, xstride, ystride, tc, beta, no_p, no_q, nd_p, nd_q);
+            }
+        }
+    }
+}
+
+static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
+                                          ptrdiff_t _ystride, const int *_tc,
+                                          const uint8_t *_no_p, const uint8_t *_no_q)
+{
+    int no_p, no_q;
+    ptrdiff_t xstride = _xstride / sizeof(pixel);
+    ptrdiff_t ystride = _ystride / sizeof(pixel);
+    const int size    = 4;
+
+    for (int j = 0; j < 2; j++) {
+        pixel *pix   = (pixel *)_pix + j * size * ystride;
+        const int tc = _tc[j] << (BIT_DEPTH - 8);
+        if (tc > 0) {
+            no_p = _no_p[j];
+            no_q = _no_q[j];
+
+            FUNC(loop_filter_chroma_weak)(pix, xstride, ystride, size, tc, no_p, no_q);
+        }
+    }
+}
+
+static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
+                                            const int32_t *tc, const uint8_t *no_p,
+                                            const uint8_t *no_q)
+{
+    FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
+}
+
+static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
+                                            const int32_t *tc, const uint8_t *no_p,
+                                            const uint8_t *no_q)
+{
+    FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
+}
+
+static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
+                                          int beta, const int32_t *tc, const uint8_t *no_p,
+                                          const uint8_t *no_q)
+{
+    FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
+                                beta, tc, no_p, no_q);
+}
+
+static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
+                                          int beta, const int32_t *tc, const uint8_t *no_p,
+                                          const uint8_t *no_q)
+{
+    FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
+                                beta, tc, no_p, no_q);
+}
+
+#undef P3
+#undef P2
+#undef P1
+#undef P0
+#undef Q0
+#undef Q1
+#undef Q2
+#undef Q3
+
+#undef TP3
+#undef TP2
+#undef TP1
+#undef TP0
+#undef TQ0
+#undef TQ1
+#undef TQ2
+#undef TQ3
diff --git a/libavcodec/hevc/filter.c b/libavcodec/hevc/filter.c
new file mode 100644
index 0000000000..db7525170d
--- /dev/null
+++ b/libavcodec/hevc/filter.c
@@ -0,0 +1,893 @@
+/*
+ * HEVC video decoder
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ * Copyright (C) 2013 Seppo Tomperi
+ * Copyright (C) 2013 Wassim Hamidouche
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+
+#include "hevcdec.h"
+#include "progressframe.h"
+
+#define LUMA 0
+#define CB 1
+#define CR 2
+
+static const uint8_t tctable[54] = {
+    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0, 0, 1, // QP  0...18
+    1, 1, 1, 1, 1, 1, 1,  1,  2,  2,  2,  2,  3,  3,  3,  3, 4, 4, 4, // QP 19...37
+    5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24           // QP 38...53
+};
+
+static const uint8_t betatable[52] = {
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  7,  8, // QP 0...18
+     9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, // QP 19...37
+    38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64                      // QP 38...51
+};
+
+static int chroma_tc(const HEVCContext *s, int qp_y, int c_idx, int tc_offset)
+{
+    static const int qp_c[] = {
+        29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
+    };
+    int qp, qp_i, offset, idxt;
+
+    // slice qp offset is not used for deblocking
+    if (c_idx == 1)
+        offset = s->ps.pps->cb_qp_offset;
+    else
+        offset = s->ps.pps->cr_qp_offset;
+
+    qp_i = av_clip(qp_y + offset, 0, 57);
+    if (s->ps.sps->chroma_format_idc == 1) {
+        if (qp_i < 30)
+            qp = qp_i;
+        else if (qp_i > 43)
+            qp = qp_i - 6;
+        else
+            qp = qp_c[qp_i - 30];
+    } else {
+        qp = av_clip(qp_i, 0, 51);
+    }
+
+    idxt = av_clip(qp + DEFAULT_INTRA_TC_OFFSET + tc_offset, 0, 53);
+    return tctable[idxt];
+}
+
+static int get_qPy_pred(HEVCLocalContext *lc, const HEVCContext *s,
+                        int xBase, int yBase, int log2_cb_size)
+{
+    int ctb_size_mask        = (1 << s->ps.sps->log2_ctb_size) - 1;
+    int MinCuQpDeltaSizeMask = (1 << (s->ps.sps->log2_ctb_size -
+                                      s->ps.pps->diff_cu_qp_delta_depth)) - 1;
+    int xQgBase              = xBase - (xBase & MinCuQpDeltaSizeMask);
+    int yQgBase              = yBase - (yBase & MinCuQpDeltaSizeMask);
+    int min_cb_width         = s->ps.sps->min_cb_width;
+    int x_cb                 = xQgBase >> s->ps.sps->log2_min_cb_size;
+    int y_cb                 = yQgBase >> s->ps.sps->log2_min_cb_size;
+    int availableA           = (xBase   & ctb_size_mask) &&
+                               (xQgBase & ctb_size_mask);
+    int availableB           = (yBase   & ctb_size_mask) &&
+                               (yQgBase & ctb_size_mask);
+    int qPy_pred, qPy_a, qPy_b;
+
+    // qPy_pred
+    if (lc->first_qp_group || (!xQgBase && !yQgBase)) {
+        lc->first_qp_group = !lc->tu.is_cu_qp_delta_coded;
+        qPy_pred = s->sh.slice_qp;
+    } else {
+        qPy_pred = lc->qPy_pred;
+    }
+
+    // qPy_a
+    if (availableA == 0)
+        qPy_a = qPy_pred;
+    else
+        qPy_a = s->qp_y_tab[(x_cb - 1) + y_cb * min_cb_width];
+
+    // qPy_b
+    if (availableB == 0)
+        qPy_b = qPy_pred;
+    else
+        qPy_b = s->qp_y_tab[x_cb + (y_cb - 1) * min_cb_width];
+
+    av_assert2(qPy_a >= -s->ps.sps->qp_bd_offset && qPy_a < 52);
+    av_assert2(qPy_b >= -s->ps.sps->qp_bd_offset && qPy_b < 52);
+
+    return (qPy_a + qPy_b + 1) >> 1;
+}
+
+void ff_hevc_set_qPy(HEVCLocalContext *lc, int xBase, int yBase, int log2_cb_size)
+{
+    const HEVCContext *const s = lc->parent;
+    int qp_y = get_qPy_pred(lc, s, xBase, yBase, log2_cb_size);
+
+    if (lc->tu.cu_qp_delta != 0) {
+        int off = s->ps.sps->qp_bd_offset;
+        lc->qp_y = FFUMOD(qp_y + lc->tu.cu_qp_delta + 52 + 2 * off,
+                                 52 + off) - off;
+    } else
+        lc->qp_y = qp_y;
+}
+
+static int get_qPy(const HEVCContext *s, int xC, int yC)
+{
+    int log2_min_cb_size  = s->ps.sps->log2_min_cb_size;
+    int x                 = xC >> log2_min_cb_size;
+    int y                 = yC >> log2_min_cb_size;
+    return s->qp_y_tab[x + y * s->ps.sps->min_cb_width];
+}
+
+static void copy_CTB(uint8_t *dst, const uint8_t *src, int width, int height,
+                     ptrdiff_t stride_dst, ptrdiff_t stride_src)
+{
+    int i, j;
+
+    if (((intptr_t)dst | (intptr_t)src | stride_dst | stride_src) & 15) {
+        for (i = 0; i < height; i++) {
+            for (j = 0; j < width - 7; j+=8)
+                AV_COPY64U(dst+j, src+j);
+            dst += stride_dst;
+            src += stride_src;
+        }
+        if (width&7) {
+            dst += ((width>>3)<<3) - stride_dst * height;
+            src += ((width>>3)<<3) - stride_src * height;
+            width &= 7;
+            for (i = 0; i < height; i++) {
+                for (j = 0; j < width; j++)
+                    dst[j] = src[j];
+                dst += stride_dst;
+                src += stride_src;
+            }
+        }
+    } else {
+        for (i = 0; i < height; i++) {
+            for (j = 0; j < width; j+=16)
+                AV_COPY128(dst+j, src+j);
+            dst += stride_dst;
+            src += stride_src;
+        }
+    }
+}
+
+static void copy_pixel(uint8_t *dst, const uint8_t *src, int pixel_shift)
+{
+    if (pixel_shift)
+        *(uint16_t *)dst = *(uint16_t *)src;
+    else
+        *dst = *src;
+}
+
+static void copy_vert(uint8_t *dst, const uint8_t *src,
+                      int pixel_shift, int height,
+                      ptrdiff_t stride_dst, ptrdiff_t stride_src)
+{
+    int i;
+    if (pixel_shift == 0) {
+        for (i = 0; i < height; i++) {
+            *dst = *src;
+            dst += stride_dst;
+            src += stride_src;
+        }
+    } else {
+        for (i = 0; i < height; i++) {
+            *(uint16_t *)dst = *(uint16_t *)src;
+            dst += stride_dst;
+            src += stride_src;
+        }
+    }
+}
+
+static void copy_CTB_to_hv(const HEVCContext *s, const uint8_t *src,
+                           ptrdiff_t stride_src, int x, int y, int width, int height,
+                           int c_idx, int x_ctb, int y_ctb)
+{
+    int sh = s->ps.sps->pixel_shift;
+    int w = s->ps.sps->width >> s->ps.sps->hshift[c_idx];
+    int h = s->ps.sps->height >> s->ps.sps->vshift[c_idx];
+
+    /* copy horizontal edges */
+    memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb) * w + x) << sh),
+        src, width << sh);
+    memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 1) * w + x) << sh),
+        src + stride_src * (height - 1), width << sh);
+
+    /* copy vertical edges */
+    copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb) * h + y) << sh), src, sh, height, 1 << sh, stride_src);
+
+    copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 1) * h + y) << sh), src + ((width - 1) << sh), sh, height, 1 << sh, stride_src);
+}
+
+static void restore_tqb_pixels(const HEVCContext *s,
+                               uint8_t *src1, const uint8_t *dst1,
+                               ptrdiff_t stride_src, ptrdiff_t stride_dst,
+                               int x0, int y0, int width, int height, int c_idx)
+{
+    if ( s->ps.pps->transquant_bypass_enable_flag ||
+            (s->ps.sps->pcm_loop_filter_disabled && s->ps.sps->pcm_enabled)) {
+        int x, y;
+        int min_pu_size  = 1 << s->ps.sps->log2_min_pu_size;
+        int hshift       = s->ps.sps->hshift[c_idx];
+        int vshift       = s->ps.sps->vshift[c_idx];
+        int x_min        = ((x0         ) >> s->ps.sps->log2_min_pu_size);
+        int y_min        = ((y0         ) >> s->ps.sps->log2_min_pu_size);
+        int x_max        = ((x0 + width ) >> s->ps.sps->log2_min_pu_size);
+        int y_max        = ((y0 + height) >> s->ps.sps->log2_min_pu_size);
+        int len          = (min_pu_size >> hshift) << s->ps.sps->pixel_shift;
+        for (y = y_min; y < y_max; y++) {
+            for (x = x_min; x < x_max; x++) {
+                if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
+                    int n;
+                    uint8_t *src = src1 + (((y << s->ps.sps->log2_min_pu_size) - y0) >> vshift) * stride_src + ((((x << s->ps.sps->log2_min_pu_size) - x0) >> hshift) << s->ps.sps->pixel_shift);
+                    const uint8_t *dst = dst1 + (((y << s->ps.sps->log2_min_pu_size) - y0) >> vshift) * stride_dst + ((((x << s->ps.sps->log2_min_pu_size) - x0) >> hshift) << s->ps.sps->pixel_shift);
+                    for (n = 0; n < (min_pu_size >> vshift); n++) {
+                        memcpy(src, dst, len);
+                        src += stride_src;
+                        dst += stride_dst;
+                    }
+                }
+            }
+        }
+    }
+}
+
+#define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
+
+static void sao_filter_CTB(HEVCLocalContext *lc, const HEVCContext *s, int x, int y)
+{
+    static const uint8_t sao_tab[8] = { 0, 1, 2, 2, 3, 3, 4, 4 };
+    int c_idx;
+    int edges[4];  // 0 left 1 top 2 right 3 bottom
+    int x_ctb                = x >> s->ps.sps->log2_ctb_size;
+    int y_ctb                = y >> s->ps.sps->log2_ctb_size;
+    int ctb_addr_rs          = y_ctb * s->ps.sps->ctb_width + x_ctb;
+    int ctb_addr_ts          = s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
+    SAOParams *sao           = &CTB(s->sao, x_ctb, y_ctb);
+    // flags indicating unfilterable edges
+    uint8_t vert_edge[]      = { 0, 0 };
+    uint8_t horiz_edge[]     = { 0, 0 };
+    uint8_t diag_edge[]      = { 0, 0, 0, 0 };
+    uint8_t lfase            = CTB(s->filter_slice_edges, x_ctb, y_ctb);
+    uint8_t no_tile_filter   = s->ps.pps->tiles_enabled_flag &&
+                               !s->ps.pps->loop_filter_across_tiles_enabled_flag;
+    uint8_t restore          = no_tile_filter || !lfase;
+    uint8_t left_tile_edge   = 0;
+    uint8_t right_tile_edge  = 0;
+    uint8_t up_tile_edge     = 0;
+    uint8_t bottom_tile_edge = 0;
+
+    edges[0]   = x_ctb == 0;
+    edges[1]   = y_ctb == 0;
+    edges[2]   = x_ctb == s->ps.sps->ctb_width  - 1;
+    edges[3]   = y_ctb == s->ps.sps->ctb_height - 1;
+
+    if (restore) {
+        if (!edges[0]) {
+            left_tile_edge  = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]];
+            vert_edge[0]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb)) || left_tile_edge;
+        }
+        if (!edges[2]) {
+            right_tile_edge = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1]];
+            vert_edge[1]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb)) || right_tile_edge;
+        }
+        if (!edges[1]) {
+            up_tile_edge     = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]];
+            horiz_edge[0]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) || up_tile_edge;
+        }
+        if (!edges[3]) {
+            bottom_tile_edge = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs + s->ps.sps->ctb_width]];
+            horiz_edge[1]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb + 1)) || bottom_tile_edge;
+        }
+        if (!edges[0] && !edges[1]) {
+            diag_edge[0] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || left_tile_edge || up_tile_edge;
+        }
+        if (!edges[1] && !edges[2]) {
+            diag_edge[1] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb - 1)) || right_tile_edge || up_tile_edge;
+        }
+        if (!edges[2] && !edges[3]) {
+            diag_edge[2] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb + 1)) || right_tile_edge || bottom_tile_edge;
+        }
+        if (!edges[0] && !edges[3]) {
+            diag_edge[3] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb + 1)) || left_tile_edge || bottom_tile_edge;
+        }
+    }
+
+    for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
+        int x0       = x >> s->ps.sps->hshift[c_idx];
+        int y0       = y >> s->ps.sps->vshift[c_idx];
+        ptrdiff_t stride_src = s->cur_frame->f->linesize[c_idx];
+        int ctb_size_h = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->hshift[c_idx];
+        int ctb_size_v = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->vshift[c_idx];
+        int width    = FFMIN(ctb_size_h, (s->ps.sps->width  >> s->ps.sps->hshift[c_idx]) - x0);
+        int height   = FFMIN(ctb_size_v, (s->ps.sps->height >> s->ps.sps->vshift[c_idx]) - y0);
+        int tab      = sao_tab[(FFALIGN(width, 8) >> 3) - 1];
+        uint8_t *src = &s->cur_frame->f->data[c_idx][y0 * stride_src + (x0 << s->ps.sps->pixel_shift)];
+        ptrdiff_t stride_dst;
+        uint8_t *dst;
+
+        switch (sao->type_idx[c_idx]) {
+        case SAO_BAND:
+            copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
+                           x_ctb, y_ctb);
+            if (s->ps.pps->transquant_bypass_enable_flag ||
+                (s->ps.sps->pcm_loop_filter_disabled && s->ps.sps->pcm_enabled)) {
+                dst = lc->edge_emu_buffer;
+                stride_dst = 2*MAX_PB_SIZE;
+                copy_CTB(dst, src, width << s->ps.sps->pixel_shift, height, stride_dst, stride_src);
+                s->hevcdsp.sao_band_filter[tab](src, dst, stride_src, stride_dst,
+                                                sao->offset_val[c_idx], sao->band_position[c_idx],
+                                                width, height);
+                restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
+                                   x, y, width, height, c_idx);
+            } else {
+                s->hevcdsp.sao_band_filter[tab](src, src, stride_src, stride_src,
+                                                sao->offset_val[c_idx], sao->band_position[c_idx],
+                                                width, height);
+            }
+            sao->type_idx[c_idx] = SAO_APPLIED;
+            break;
+        case SAO_EDGE:
+        {
+            int w = s->ps.sps->width >> s->ps.sps->hshift[c_idx];
+            int h = s->ps.sps->height >> s->ps.sps->vshift[c_idx];
+            int left_edge = edges[0];
+            int top_edge = edges[1];
+            int right_edge = edges[2];
+            int bottom_edge = edges[3];
+            int sh = s->ps.sps->pixel_shift;
+            int left_pixels, right_pixels;
+
+            stride_dst = 2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE;
+            dst = lc->edge_emu_buffer + stride_dst + AV_INPUT_BUFFER_PADDING_SIZE;
+
+            if (!top_edge) {
+                int left = 1 - left_edge;
+                int right = 1 - right_edge;
+                const uint8_t *src1[2];
+                uint8_t *dst1;
+                int src_idx, pos;
+
+                dst1 = dst - stride_dst - (left << sh);
+                src1[0] = src - stride_src - (left << sh);
+                src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb - 1) * w + x0 - left) << sh);
+                pos = 0;
+                if (left) {
+                    src_idx = (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] ==
+                               SAO_APPLIED);
+                    copy_pixel(dst1, src1[src_idx], sh);
+                    pos += (1 << sh);
+                }
+                src_idx = (CTB(s->sao, x_ctb, y_ctb-1).type_idx[c_idx] ==
+                           SAO_APPLIED);
+                memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
+                if (right) {
+                    pos += width << sh;
+                    src_idx = (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] ==
+                               SAO_APPLIED);
+                    copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
+                }
+            }
+            if (!bottom_edge) {
+                int left = 1 - left_edge;
+                int right = 1 - right_edge;
+                const uint8_t *src1[2];
+                uint8_t *dst1;
+                int src_idx, pos;
+
+                dst1 = dst + height * stride_dst - (left << sh);
+                src1[0] = src + height * stride_src - (left << sh);
+                src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 2) * w + x0 - left) << sh);
+                pos = 0;
+                if (left) {
+                    src_idx = (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] ==
+                               SAO_APPLIED);
+                    copy_pixel(dst1, src1[src_idx], sh);
+                    pos += (1 << sh);
+                }
+                src_idx = (CTB(s->sao, x_ctb, y_ctb+1).type_idx[c_idx] ==
+                           SAO_APPLIED);
+                memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
+                if (right) {
+                    pos += width << sh;
+                    src_idx = (CTB(s->sao, x_ctb+1, y_ctb+1).type_idx[c_idx] ==
+                               SAO_APPLIED);
+                    copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
+                }
+            }
+            left_pixels = 0;
+            if (!left_edge) {
+                if (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
+                    copy_vert(dst - (1 << sh),
+                              s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb - 1) * h + y0) << sh),
+                              sh, height, stride_dst, 1 << sh);
+                } else {
+                    left_pixels = 1;
+                }
+            }
+            right_pixels = 0;
+            if (!right_edge) {
+                if (CTB(s->sao, x_ctb+1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
+                    copy_vert(dst + (width << sh),
+                              s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 2) * h + y0) << sh),
+                              sh, height, stride_dst, 1 << sh);
+                } else {
+                    right_pixels = 1;
+                }
+            }
+
+            copy_CTB(dst - (left_pixels << sh),
+                     src - (left_pixels << sh),
+                     (width + left_pixels + right_pixels) << sh,
+                     height, stride_dst, stride_src);
+
+            copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
+                           x_ctb, y_ctb);
+            s->hevcdsp.sao_edge_filter[tab](src, dst, stride_src, sao->offset_val[c_idx],
+                                            sao->eo_class[c_idx], width, height);
+            s->hevcdsp.sao_edge_restore[restore](src, dst,
+                                                stride_src, stride_dst,
+                                                sao,
+                                                edges, width,
+                                                height, c_idx,
+                                                vert_edge,
+                                                horiz_edge,
+                                                diag_edge);
+            restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
+                               x, y, width, height, c_idx);
+            sao->type_idx[c_idx] = SAO_APPLIED;
+            break;
+        }
+        }
+    }
+}
+
+static int get_pcm(const HEVCContext *s, int x, int y)
+{
+    int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
+    int x_pu, y_pu;
+
+    if (x < 0 || y < 0)
+        return 2;
+
+    x_pu = x >> log2_min_pu_size;
+    y_pu = y >> log2_min_pu_size;
+
+    if (x_pu >= s->ps.sps->min_pu_width || y_pu >= s->ps.sps->min_pu_height)
+        return 2;
+    return s->is_pcm[y_pu * s->ps.sps->min_pu_width + x_pu];
+}
+
+#define TC_CALC(qp, bs)                                                 \
+    tctable[av_clip((qp) + DEFAULT_INTRA_TC_OFFSET * ((bs) - 1) +       \
+                    (tc_offset & -2),                                   \
+                    0, MAX_QP + DEFAULT_INTRA_TC_OFFSET)]
+
+static void deblocking_filter_CTB(const HEVCContext *s, int x0, int y0)
+{
+    uint8_t **data     = s->cur_frame->f->data;
+    int      *linesize = s->cur_frame->f->linesize;
+
+    uint8_t *src;
+    int x, y;
+    int chroma, beta;
+    int32_t c_tc[2], tc[2];
+    uint8_t no_p[2] = { 0 };
+    uint8_t no_q[2] = { 0 };
+
+    int log2_ctb_size = s->ps.sps->log2_ctb_size;
+    int x_end, x_end2, y_end;
+    int ctb_size        = 1 << log2_ctb_size;
+    int ctb             = (x0 >> log2_ctb_size) +
+                          (y0 >> log2_ctb_size) * s->ps.sps->ctb_width;
+    int cur_tc_offset   = s->deblock[ctb].tc_offset;
+    int cur_beta_offset = s->deblock[ctb].beta_offset;
+    int left_tc_offset, left_beta_offset;
+    int tc_offset, beta_offset;
+    int pcmf = (s->ps.sps->pcm_enabled &&
+                s->ps.sps->pcm_loop_filter_disabled) ||
+               s->ps.pps->transquant_bypass_enable_flag;
+
+    if (x0) {
+        left_tc_offset   = s->deblock[ctb - 1].tc_offset;
+        left_beta_offset = s->deblock[ctb - 1].beta_offset;
+    } else {
+        left_tc_offset   = 0;
+        left_beta_offset = 0;
+    }
+
+    x_end = x0 + ctb_size;
+    if (x_end > s->ps.sps->width)
+        x_end = s->ps.sps->width;
+    y_end = y0 + ctb_size;
+    if (y_end > s->ps.sps->height)
+        y_end = s->ps.sps->height;
+
+    tc_offset   = cur_tc_offset;
+    beta_offset = cur_beta_offset;
+
+    x_end2 = x_end;
+    if (x_end2 != s->ps.sps->width)
+        x_end2 -= 8;
+    for (y = y0; y < y_end; y += 8) {
+        // vertical filtering luma
+        for (x = x0 ? x0 : 8; x < x_end; x += 8) {
+            const int bs0 = s->vertical_bs[(x +  y      * s->bs_width) >> 2];
+            const int bs1 = s->vertical_bs[(x + (y + 4) * s->bs_width) >> 2];
+            if (bs0 || bs1) {
+                const int qp = (get_qPy(s, x - 1, y)     + get_qPy(s, x, y)     + 1) >> 1;
+
+                beta = betatable[av_clip(qp + beta_offset, 0, MAX_QP)];
+
+                tc[0]   = bs0 ? TC_CALC(qp, bs0) : 0;
+                tc[1]   = bs1 ? TC_CALC(qp, bs1) : 0;
+                src     = &data[LUMA][y * linesize[LUMA] + (x << s->ps.sps->pixel_shift)];
+                if (pcmf) {
+                    no_p[0] = get_pcm(s, x - 1, y);
+                    no_p[1] = get_pcm(s, x - 1, y + 4);
+                    no_q[0] = get_pcm(s, x, y);
+                    no_q[1] = get_pcm(s, x, y + 4);
+                    s->hevcdsp.hevc_v_loop_filter_luma_c(src, linesize[LUMA],
+                                                         beta, tc, no_p, no_q);
+                } else
+                    s->hevcdsp.hevc_v_loop_filter_luma(src, linesize[LUMA],
+                                                       beta, tc, no_p, no_q);
+            }
+        }
+
+        if(!y)
+             continue;
+
+        // horizontal filtering luma
+        for (x = x0 ? x0 - 8 : 0; x < x_end2; x += 8) {
+            const int bs0 = s->horizontal_bs[( x      + y * s->bs_width) >> 2];
+            const int bs1 = s->horizontal_bs[((x + 4) + y * s->bs_width) >> 2];
+            if (bs0 || bs1) {
+                const int qp = (get_qPy(s, x, y - 1)     + get_qPy(s, x, y)     + 1) >> 1;
+
+                tc_offset   = x >= x0 ? cur_tc_offset : left_tc_offset;
+                beta_offset = x >= x0 ? cur_beta_offset : left_beta_offset;
+
+                beta = betatable[av_clip(qp + beta_offset, 0, MAX_QP)];
+                tc[0]   = bs0 ? TC_CALC(qp, bs0) : 0;
+                tc[1]   = bs1 ? TC_CALC(qp, bs1) : 0;
+                src     = &data[LUMA][y * linesize[LUMA] + (x << s->ps.sps->pixel_shift)];
+                if (pcmf) {
+                    no_p[0] = get_pcm(s, x, y - 1);
+                    no_p[1] = get_pcm(s, x + 4, y - 1);
+                    no_q[0] = get_pcm(s, x, y);
+                    no_q[1] = get_pcm(s, x + 4, y);
+                    s->hevcdsp.hevc_h_loop_filter_luma_c(src, linesize[LUMA],
+                                                         beta, tc, no_p, no_q);
+                } else
+                    s->hevcdsp.hevc_h_loop_filter_luma(src, linesize[LUMA],
+                                                       beta, tc, no_p, no_q);
+            }
+        }
+    }
+
+    if (s->ps.sps->chroma_format_idc) {
+        for (chroma = 1; chroma <= 2; chroma++) {
+            int h = 1 << s->ps.sps->hshift[chroma];
+            int v = 1 << s->ps.sps->vshift[chroma];
+
+            // vertical filtering chroma
+            for (y = y0; y < y_end; y += (8 * v)) {
+                for (x = x0 ? x0 : 8 * h; x < x_end; x += (8 * h)) {
+                    const int bs0 = s->vertical_bs[(x +  y            * s->bs_width) >> 2];
+                    const int bs1 = s->vertical_bs[(x + (y + (4 * v)) * s->bs_width) >> 2];
+
+                    if ((bs0 == 2) || (bs1 == 2)) {
+                        const int qp0 = (get_qPy(s, x - 1, y)           + get_qPy(s, x, y)           + 1) >> 1;
+                        const int qp1 = (get_qPy(s, x - 1, y + (4 * v)) + get_qPy(s, x, y + (4 * v)) + 1) >> 1;
+
+                        c_tc[0] = (bs0 == 2) ? chroma_tc(s, qp0, chroma, tc_offset) : 0;
+                        c_tc[1] = (bs1 == 2) ? chroma_tc(s, qp1, chroma, tc_offset) : 0;
+                        src       = &data[chroma][(y >> s->ps.sps->vshift[chroma]) * linesize[chroma] + ((x >> s->ps.sps->hshift[chroma]) << s->ps.sps->pixel_shift)];
+                        if (pcmf) {
+                            no_p[0] = get_pcm(s, x - 1, y);
+                            no_p[1] = get_pcm(s, x - 1, y + (4 * v));
+                            no_q[0] = get_pcm(s, x, y);
+                            no_q[1] = get_pcm(s, x, y + (4 * v));
+                            s->hevcdsp.hevc_v_loop_filter_chroma_c(src, linesize[chroma],
+                                                                   c_tc, no_p, no_q);
+                        } else
+                            s->hevcdsp.hevc_v_loop_filter_chroma(src, linesize[chroma],
+                                                                 c_tc, no_p, no_q);
+                    }
+                }
+
+                if(!y)
+                    continue;
+
+                // horizontal filtering chroma
+                tc_offset = x0 ? left_tc_offset : cur_tc_offset;
+                x_end2 = x_end;
+                if (x_end != s->ps.sps->width)
+                    x_end2 = x_end - 8 * h;
+                for (x = x0 ? x0 - 8 * h : 0; x < x_end2; x += (8 * h)) {
+                    const int bs0 = s->horizontal_bs[( x          + y * s->bs_width) >> 2];
+                    const int bs1 = s->horizontal_bs[((x + 4 * h) + y * s->bs_width) >> 2];
+                    if ((bs0 == 2) || (bs1 == 2)) {
+                        const int qp0 = bs0 == 2 ? (get_qPy(s, x,           y - 1) + get_qPy(s, x,           y) + 1) >> 1 : 0;
+                        const int qp1 = bs1 == 2 ? (get_qPy(s, x + (4 * h), y - 1) + get_qPy(s, x + (4 * h), y) + 1) >> 1 : 0;
+
+                        c_tc[0]   = bs0 == 2 ? chroma_tc(s, qp0, chroma, tc_offset)     : 0;
+                        c_tc[1]   = bs1 == 2 ? chroma_tc(s, qp1, chroma, cur_tc_offset) : 0;
+                        src       = &data[chroma][(y >> s->ps.sps->vshift[1]) * linesize[chroma] + ((x >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
+                        if (pcmf) {
+                            no_p[0] = get_pcm(s, x,           y - 1);
+                            no_p[1] = get_pcm(s, x + (4 * h), y - 1);
+                            no_q[0] = get_pcm(s, x,           y);
+                            no_q[1] = get_pcm(s, x + (4 * h), y);
+                            s->hevcdsp.hevc_h_loop_filter_chroma_c(src, linesize[chroma],
+                                                                   c_tc, no_p, no_q);
+                        } else
+                            s->hevcdsp.hevc_h_loop_filter_chroma(src, linesize[chroma],
+                                                                 c_tc, no_p, no_q);
+                    }
+                }
+            }
+        }
+    }
+}
+
+static int boundary_strength(const HEVCContext *s, const MvField *curr, const MvField *neigh,
+                             const RefPicList *neigh_refPicList)
+{
+    if (curr->pred_flag == PF_BI &&  neigh->pred_flag == PF_BI) {
+        // same L0 and L1
+        if (s->cur_frame->refPicList[0].list[curr->ref_idx[0]] == neigh_refPicList[0].list[neigh->ref_idx[0]]  &&
+            s->cur_frame->refPicList[0].list[curr->ref_idx[0]] == s->cur_frame->refPicList[1].list[curr->ref_idx[1]] &&
+            neigh_refPicList[0].list[neigh->ref_idx[0]] == neigh_refPicList[1].list[neigh->ref_idx[1]]) {
+            if ((FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 ||
+                 FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4) &&
+                (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 ||
+                 FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4))
+                return 1;
+            else
+                return 0;
+        } else if (neigh_refPicList[0].list[neigh->ref_idx[0]] == s->cur_frame->refPicList[0].list[curr->ref_idx[0]] &&
+                   neigh_refPicList[1].list[neigh->ref_idx[1]] == s->cur_frame->refPicList[1].list[curr->ref_idx[1]]) {
+            if (FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 ||
+                FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4)
+                return 1;
+            else
+                return 0;
+        } else if (neigh_refPicList[1].list[neigh->ref_idx[1]] == s->cur_frame->refPicList[0].list[curr->ref_idx[0]] &&
+                   neigh_refPicList[0].list[neigh->ref_idx[0]] == s->cur_frame->refPicList[1].list[curr->ref_idx[1]]) {
+            if (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 ||
+                FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4)
+                return 1;
+            else
+                return 0;
+        } else {
+            return 1;
+        }
+    } else if ((curr->pred_flag != PF_BI) && (neigh->pred_flag != PF_BI)){ // 1 MV
+        Mv A, B;
+        int ref_A, ref_B;
+
+        if (curr->pred_flag & 1) {
+            A     = curr->mv[0];
+            ref_A = s->cur_frame->refPicList[0].list[curr->ref_idx[0]];
+        } else {
+            A     = curr->mv[1];
+            ref_A = s->cur_frame->refPicList[1].list[curr->ref_idx[1]];
+        }
+
+        if (neigh->pred_flag & 1) {
+            B     = neigh->mv[0];
+            ref_B = neigh_refPicList[0].list[neigh->ref_idx[0]];
+        } else {
+            B     = neigh->mv[1];
+            ref_B = neigh_refPicList[1].list[neigh->ref_idx[1]];
+        }
+
+        if (ref_A == ref_B) {
+            if (FFABS(A.x - B.x) >= 4 || FFABS(A.y - B.y) >= 4)
+                return 1;
+            else
+                return 0;
+        } else
+            return 1;
+    }
+
+    return 1;
+}
+
+void ff_hevc_deblocking_boundary_strengths(HEVCLocalContext *lc, int x0, int y0,
+                                           int log2_trafo_size)
+{
+    const HEVCContext *s = lc->parent;
+    const MvField *tab_mvf = s->cur_frame->tab_mvf;
+    int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
+    int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
+    int min_pu_width     = s->ps.sps->min_pu_width;
+    int min_tu_width     = s->ps.sps->min_tb_width;
+    int is_intra = tab_mvf[(y0 >> log2_min_pu_size) * min_pu_width +
+                           (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA;
+    int boundary_upper, boundary_left;
+    int i, j, bs;
+
+    boundary_upper = y0 > 0 && !(y0 & 7);
+    if (boundary_upper &&
+        ((!s->sh.slice_loop_filter_across_slices_enabled_flag &&
+          lc->boundary_flags & BOUNDARY_UPPER_SLICE &&
+          (y0 % (1 << s->ps.sps->log2_ctb_size)) == 0) ||
+         (!s->ps.pps->loop_filter_across_tiles_enabled_flag &&
+          lc->boundary_flags & BOUNDARY_UPPER_TILE &&
+          (y0 % (1 << s->ps.sps->log2_ctb_size)) == 0)))
+        boundary_upper = 0;
+
+    if (boundary_upper) {
+        const RefPicList *rpl_top = (lc->boundary_flags & BOUNDARY_UPPER_SLICE) ?
+                                    ff_hevc_get_ref_list(s, s->cur_frame, x0, y0 - 1) :
+                                    s->cur_frame->refPicList;
+        int yp_pu = (y0 - 1) >> log2_min_pu_size;
+        int yq_pu =  y0      >> log2_min_pu_size;
+        int yp_tu = (y0 - 1) >> log2_min_tu_size;
+        int yq_tu =  y0      >> log2_min_tu_size;
+
+            for (i = 0; i < (1 << log2_trafo_size); i += 4) {
+                int x_pu = (x0 + i) >> log2_min_pu_size;
+                int x_tu = (x0 + i) >> log2_min_tu_size;
+                const MvField *top  = &tab_mvf[yp_pu * min_pu_width + x_pu];
+                const MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu];
+                uint8_t top_cbf_luma  = s->cbf_luma[yp_tu * min_tu_width + x_tu];
+                uint8_t curr_cbf_luma = s->cbf_luma[yq_tu * min_tu_width + x_tu];
+
+                if (curr->pred_flag == PF_INTRA || top->pred_flag == PF_INTRA)
+                    bs = 2;
+                else if (curr_cbf_luma || top_cbf_luma)
+                    bs = 1;
+                else
+                    bs = boundary_strength(s, curr, top, rpl_top);
+                s->horizontal_bs[((x0 + i) + y0 * s->bs_width) >> 2] = bs;
+            }
+    }
+
+    // bs for vertical TU boundaries
+    boundary_left = x0 > 0 && !(x0 & 7);
+    if (boundary_left &&
+        ((!s->sh.slice_loop_filter_across_slices_enabled_flag &&
+          lc->boundary_flags & BOUNDARY_LEFT_SLICE &&
+          (x0 % (1 << s->ps.sps->log2_ctb_size)) == 0) ||
+         (!s->ps.pps->loop_filter_across_tiles_enabled_flag &&
+          lc->boundary_flags & BOUNDARY_LEFT_TILE &&
+          (x0 % (1 << s->ps.sps->log2_ctb_size)) == 0)))
+        boundary_left = 0;
+
+    if (boundary_left) {
+        const RefPicList *rpl_left = (lc->boundary_flags & BOUNDARY_LEFT_SLICE) ?
+                                     ff_hevc_get_ref_list(s, s->cur_frame, x0 - 1, y0) :
+                                     s->cur_frame->refPicList;
+        int xp_pu = (x0 - 1) >> log2_min_pu_size;
+        int xq_pu =  x0      >> log2_min_pu_size;
+        int xp_tu = (x0 - 1) >> log2_min_tu_size;
+        int xq_tu =  x0      >> log2_min_tu_size;
+
+            for (i = 0; i < (1 << log2_trafo_size); i += 4) {
+                int y_pu      = (y0 + i) >> log2_min_pu_size;
+                int y_tu      = (y0 + i) >> log2_min_tu_size;
+                const MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu];
+                const MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu];
+                uint8_t left_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xp_tu];
+                uint8_t curr_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xq_tu];
+
+                if (curr->pred_flag == PF_INTRA || left->pred_flag == PF_INTRA)
+                    bs = 2;
+                else if (curr_cbf_luma || left_cbf_luma)
+                    bs = 1;
+                else
+                    bs = boundary_strength(s, curr, left, rpl_left);
+                s->vertical_bs[(x0 + (y0 + i) * s->bs_width) >> 2] = bs;
+            }
+    }
+
+    if (log2_trafo_size > log2_min_pu_size && !is_intra) {
+        const RefPicList *rpl = s->cur_frame->refPicList;
+
+        // bs for TU internal horizontal PU boundaries
+        for (j = 8; j < (1 << log2_trafo_size); j += 8) {
+            int yp_pu = (y0 + j - 1) >> log2_min_pu_size;
+            int yq_pu = (y0 + j)     >> log2_min_pu_size;
+
+            for (i = 0; i < (1 << log2_trafo_size); i += 4) {
+                int x_pu = (x0 + i) >> log2_min_pu_size;
+                const MvField *top  = &tab_mvf[yp_pu * min_pu_width + x_pu];
+                const MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu];
+
+                bs = boundary_strength(s, curr, top, rpl);
+                s->horizontal_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs;
+            }
+        }
+
+        // bs for TU internal vertical PU boundaries
+        for (j = 0; j < (1 << log2_trafo_size); j += 4) {
+            int y_pu = (y0 + j) >> log2_min_pu_size;
+
+            for (i = 8; i < (1 << log2_trafo_size); i += 8) {
+                int xp_pu = (x0 + i - 1) >> log2_min_pu_size;
+                int xq_pu = (x0 + i)     >> log2_min_pu_size;
+                const MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu];
+                const MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu];
+
+                bs = boundary_strength(s, curr, left, rpl);
+                s->vertical_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs;
+            }
+        }
+    }
+}
+
+#undef LUMA
+#undef CB
+#undef CR
+
+void ff_hevc_hls_filter(HEVCLocalContext *lc, int x, int y, int ctb_size)
+{
+    const HEVCContext *const s = lc->parent;
+    int x_end = x >= s->ps.sps->width  - ctb_size;
+    int skip = 0;
+    if (s->avctx->skip_loop_filter >= AVDISCARD_ALL ||
+        (s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && !IS_IDR(s)) ||
+        (s->avctx->skip_loop_filter >= AVDISCARD_NONINTRA &&
+         s->sh.slice_type != HEVC_SLICE_I) ||
+        (s->avctx->skip_loop_filter >= AVDISCARD_BIDIR &&
+         s->sh.slice_type == HEVC_SLICE_B) ||
+        (s->avctx->skip_loop_filter >= AVDISCARD_NONREF &&
+        ff_hevc_nal_is_nonref(s->nal_unit_type)))
+        skip = 1;
+
+    if (!skip)
+        deblocking_filter_CTB(s, x, y);
+    if (s->ps.sps->sao_enabled && !skip) {
+        int y_end = y >= s->ps.sps->height - ctb_size;
+        if (y && x)
+            sao_filter_CTB(lc, s, x - ctb_size, y - ctb_size);
+        if (x && y_end)
+            sao_filter_CTB(lc, s, x - ctb_size, y);
+        if (y && x_end) {
+            sao_filter_CTB(lc, s, x, y - ctb_size);
+            if (s->threads_type & FF_THREAD_FRAME )
+                ff_progress_frame_report(&s->cur_frame->tf, y);
+        }
+        if (x_end && y_end) {
+            sao_filter_CTB(lc, s, x , y);
+            if (s->threads_type & FF_THREAD_FRAME )
+                ff_progress_frame_report(&s->cur_frame->tf, y + ctb_size);
+        }
+    } else if (s->threads_type & FF_THREAD_FRAME && x_end)
+        ff_progress_frame_report(&s->cur_frame->tf, y + ctb_size - 4);
+}
+
+void ff_hevc_hls_filters(HEVCLocalContext *lc, int x_ctb, int y_ctb, int ctb_size)
+{
+    int x_end = x_ctb >= lc->parent->ps.sps->width  - ctb_size;
+    int y_end = y_ctb >= lc->parent->ps.sps->height - ctb_size;
+    if (y_ctb && x_ctb)
+        ff_hevc_hls_filter(lc, x_ctb - ctb_size, y_ctb - ctb_size, ctb_size);
+    if (y_ctb && x_end)
+        ff_hevc_hls_filter(lc, x_ctb, y_ctb - ctb_size, ctb_size);
+    if (x_ctb && y_end)
+        ff_hevc_hls_filter(lc, x_ctb - ctb_size, y_ctb, ctb_size);
+}
diff --git a/libavcodec/hevc/hevc.h b/libavcodec/hevc/hevc.h
new file mode 100644
index 0000000000..9fdbc0a224
--- /dev/null
+++ b/libavcodec/hevc/hevc.h
@@ -0,0 +1,163 @@
+/*
+ * HEVC shared code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HEVC_HEVC_H
+#define AVCODEC_HEVC_HEVC_H
+
+/**
+ * Table 7-1 – NAL unit type codes and NAL unit type classes in
+ * T-REC-H.265-201802
+ */
+enum HEVCNALUnitType {
+    HEVC_NAL_TRAIL_N        = 0,
+    HEVC_NAL_TRAIL_R        = 1,
+    HEVC_NAL_TSA_N          = 2,
+    HEVC_NAL_TSA_R          = 3,
+    HEVC_NAL_STSA_N         = 4,
+    HEVC_NAL_STSA_R         = 5,
+    HEVC_NAL_RADL_N         = 6,
+    HEVC_NAL_RADL_R         = 7,
+    HEVC_NAL_RASL_N         = 8,
+    HEVC_NAL_RASL_R         = 9,
+    HEVC_NAL_VCL_N10        = 10,
+    HEVC_NAL_VCL_R11        = 11,
+    HEVC_NAL_VCL_N12        = 12,
+    HEVC_NAL_VCL_R13        = 13,
+    HEVC_NAL_VCL_N14        = 14,
+    HEVC_NAL_VCL_R15        = 15,
+    HEVC_NAL_BLA_W_LP       = 16,
+    HEVC_NAL_BLA_W_RADL     = 17,
+    HEVC_NAL_BLA_N_LP       = 18,
+    HEVC_NAL_IDR_W_RADL     = 19,
+    HEVC_NAL_IDR_N_LP       = 20,
+    HEVC_NAL_CRA_NUT        = 21,
+    HEVC_NAL_RSV_IRAP_VCL22 = 22,
+    HEVC_NAL_RSV_IRAP_VCL23 = 23,
+    HEVC_NAL_RSV_VCL24      = 24,
+    HEVC_NAL_RSV_VCL25      = 25,
+    HEVC_NAL_RSV_VCL26      = 26,
+    HEVC_NAL_RSV_VCL27      = 27,
+    HEVC_NAL_RSV_VCL28      = 28,
+    HEVC_NAL_RSV_VCL29      = 29,
+    HEVC_NAL_RSV_VCL30      = 30,
+    HEVC_NAL_RSV_VCL31      = 31,
+    HEVC_NAL_VPS            = 32,
+    HEVC_NAL_SPS            = 33,
+    HEVC_NAL_PPS            = 34,
+    HEVC_NAL_AUD            = 35,
+    HEVC_NAL_EOS_NUT        = 36,
+    HEVC_NAL_EOB_NUT        = 37,
+    HEVC_NAL_FD_NUT         = 38,
+    HEVC_NAL_SEI_PREFIX     = 39,
+    HEVC_NAL_SEI_SUFFIX     = 40,
+    HEVC_NAL_RSV_NVCL41     = 41,
+    HEVC_NAL_RSV_NVCL42     = 42,
+    HEVC_NAL_RSV_NVCL43     = 43,
+    HEVC_NAL_RSV_NVCL44     = 44,
+    HEVC_NAL_RSV_NVCL45     = 45,
+    HEVC_NAL_RSV_NVCL46     = 46,
+    HEVC_NAL_RSV_NVCL47     = 47,
+    HEVC_NAL_UNSPEC48       = 48,
+    HEVC_NAL_UNSPEC49       = 49,
+    HEVC_NAL_UNSPEC50       = 50,
+    HEVC_NAL_UNSPEC51       = 51,
+    HEVC_NAL_UNSPEC52       = 52,
+    HEVC_NAL_UNSPEC53       = 53,
+    HEVC_NAL_UNSPEC54       = 54,
+    HEVC_NAL_UNSPEC55       = 55,
+    HEVC_NAL_UNSPEC56       = 56,
+    HEVC_NAL_UNSPEC57       = 57,
+    HEVC_NAL_UNSPEC58       = 58,
+    HEVC_NAL_UNSPEC59       = 59,
+    HEVC_NAL_UNSPEC60       = 60,
+    HEVC_NAL_UNSPEC61       = 61,
+    HEVC_NAL_UNSPEC62       = 62,
+    HEVC_NAL_UNSPEC63       = 63,
+};
+
+enum HEVCSliceType {
+    HEVC_SLICE_B = 0,
+    HEVC_SLICE_P = 1,
+    HEVC_SLICE_I = 2,
+};
+
+enum {
+    // 7.4.3.1: vps_max_layers_minus1 is in [0, 62].
+    HEVC_MAX_LAYERS     = 63,
+    // 7.4.3.1: vps_max_sub_layers_minus1 is in [0, 6].
+    HEVC_MAX_SUB_LAYERS = 7,
+    // 7.4.3.1: vps_num_layer_sets_minus1 is in [0, 1023].
+    HEVC_MAX_LAYER_SETS = 1024,
+
+    // 7.4.2.1: vps_video_parameter_set_id is u(4).
+    HEVC_MAX_VPS_COUNT = 16,
+    // 7.4.3.2.1: sps_seq_parameter_set_id is in [0, 15].
+    HEVC_MAX_SPS_COUNT = 16,
+    // 7.4.3.3.1: pps_pic_parameter_set_id is in [0, 63].
+    HEVC_MAX_PPS_COUNT = 64,
+
+    // A.4.2: MaxDpbSize is bounded above by 16.
+    HEVC_MAX_DPB_SIZE = 16,
+    // 7.4.3.1: vps_max_dec_pic_buffering_minus1[i] is in [0, MaxDpbSize - 1].
+    HEVC_MAX_REFS     = HEVC_MAX_DPB_SIZE,
+
+    // 7.4.3.2.1: num_short_term_ref_pic_sets is in [0, 64].
+    HEVC_MAX_SHORT_TERM_REF_PIC_SETS = 64,
+    // 7.4.3.2.1: num_long_term_ref_pics_sps is in [0, 32].
+    HEVC_MAX_LONG_TERM_REF_PICS      = 32,
+
+    // A.3: all profiles require that CtbLog2SizeY is in [4, 6].
+    HEVC_MIN_LOG2_CTB_SIZE = 4,
+    HEVC_MAX_LOG2_CTB_SIZE = 6,
+
+    // E.3.2: cpb_cnt_minus1[i] is in [0, 31].
+    HEVC_MAX_CPB_CNT = 32,
+
+    // A.4.1: in table A.6 the highest level allows a MaxLumaPs of 35 651 584.
+    HEVC_MAX_LUMA_PS = 35651584,
+    // A.4.1: pic_width_in_luma_samples and pic_height_in_luma_samples are
+    // constrained to be not greater than sqrt(MaxLumaPs * 8).  Hence height/
+    // width are bounded above by sqrt(8 * 35651584) = 16888.2 samples.
+    HEVC_MAX_WIDTH  = 16888,
+    HEVC_MAX_HEIGHT = 16888,
+
+    // A.4.1: table A.6 allows at most 22 tile rows for any level.
+    HEVC_MAX_TILE_ROWS    = 22,
+    // A.4.1: table A.6 allows at most 20 tile columns for any level.
+    HEVC_MAX_TILE_COLUMNS = 20,
+
+    // A.4.2: table A.6 allows at most 600 slice segments for any level.
+    HEVC_MAX_SLICE_SEGMENTS = 600,
+
+    // 7.4.7.1: in the worst case (tiles_enabled_flag and
+    // entropy_coding_sync_enabled_flag are both set), entry points can be
+    // placed at the beginning of every Ctb row in every tile, giving an
+    // upper bound of (num_tile_columns_minus1 + 1) * PicHeightInCtbsY - 1.
+    // Only a stream with very high resolution and perverse parameters could
+    // get near that, though, so set a lower limit here with the maximum
+    // possible value for 4K video (at most 135 16x16 Ctb rows).
+    HEVC_MAX_ENTRY_POINT_OFFSETS = HEVC_MAX_TILE_COLUMNS * 135,
+
+    // A.3.7: Screen content coding extensions
+    HEVC_MAX_PALETTE_PREDICTOR_SIZE = 128,
+};
+
+
+#endif /* AVCODEC_HEVC_HEVC_H */
diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c
new file mode 100644
index 0000000000..4a07fa6612
--- /dev/null
+++ b/libavcodec/hevc/hevcdec.c
@@ -0,0 +1,3754 @@
+/*
+ * HEVC video Decoder
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ * Copyright (C) 2012 - 2013 Mickael Raulet
+ * Copyright (C) 2012 - 2013 Gildas Cocherel
+ * Copyright (C) 2012 - 2013 Wassim Hamidouche
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config_components.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/avstring.h"
+#include "libavutil/common.h"
+#include "libavutil/film_grain_params.h"
+#include "libavutil/internal.h"
+#include "libavutil/md5.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/timecode.h"
+
+#include "aom_film_grain.h"
+#include "bswapdsp.h"
+#include "cabac_functions.h"
+#include "codec_internal.h"
+#include "decode.h"
+#include "golomb.h"
+#include "hevc.h"
+#include "parse.h"
+#include "hevcdec.h"
+#include "hwaccel_internal.h"
+#include "hwconfig.h"
+#include "internal.h"
+#include "profiles.h"
+#include "progressframe.h"
+#include "refstruct.h"
+#include "thread.h"
+
+static const uint8_t hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
+
+/**
+ * NOTE: Each function hls_foo correspond to the function foo in the
+ * specification (HLS stands for High Level Syntax).
+ */
+
+/**
+ * Section 5.7
+ */
+
+/* free everything allocated  by pic_arrays_init() */
+static void pic_arrays_free(HEVCContext *s)
+{
+    av_freep(&s->sao);
+    av_freep(&s->deblock);
+
+    av_freep(&s->skip_flag);
+    av_freep(&s->tab_ct_depth);
+
+    av_freep(&s->tab_ipm);
+    av_freep(&s->cbf_luma);
+    av_freep(&s->is_pcm);
+
+    av_freep(&s->qp_y_tab);
+    av_freep(&s->tab_slice_address);
+    av_freep(&s->filter_slice_edges);
+
+    av_freep(&s->horizontal_bs);
+    av_freep(&s->vertical_bs);
+
+    av_freep(&s->sh.entry_point_offset);
+    av_freep(&s->sh.size);
+    av_freep(&s->sh.offset);
+
+    ff_refstruct_pool_uninit(&s->tab_mvf_pool);
+    ff_refstruct_pool_uninit(&s->rpl_tab_pool);
+}
+
+/* allocate arrays that depend on frame dimensions */
+static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
+{
+    int log2_min_cb_size = sps->log2_min_cb_size;
+    int width            = sps->width;
+    int height           = sps->height;
+    int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
+                           ((height >> log2_min_cb_size) + 1);
+    int ctb_count        = sps->ctb_width * sps->ctb_height;
+    int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
+
+    s->bs_width  = (width  >> 2) + 1;
+    s->bs_height = (height >> 2) + 1;
+
+    s->sao           = av_calloc(ctb_count, sizeof(*s->sao));
+    s->deblock       = av_calloc(ctb_count, sizeof(*s->deblock));
+    if (!s->sao || !s->deblock)
+        goto fail;
+
+    s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
+    s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
+    if (!s->skip_flag || !s->tab_ct_depth)
+        goto fail;
+
+    s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
+    s->tab_ipm  = av_mallocz(min_pu_size);
+    s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
+    if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
+        goto fail;
+
+    s->filter_slice_edges = av_mallocz(ctb_count);
+    s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
+                                      sizeof(*s->tab_slice_address));
+    s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
+                                      sizeof(*s->qp_y_tab));
+    if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
+        goto fail;
+
+    s->horizontal_bs = av_calloc(s->bs_width, s->bs_height);
+    s->vertical_bs   = av_calloc(s->bs_width, s->bs_height);
+    if (!s->horizontal_bs || !s->vertical_bs)
+        goto fail;
+
+    s->tab_mvf_pool = ff_refstruct_pool_alloc(min_pu_size * sizeof(MvField), 0);
+    s->rpl_tab_pool = ff_refstruct_pool_alloc(ctb_count * sizeof(RefPicListTab), 0);
+    if (!s->tab_mvf_pool || !s->rpl_tab_pool)
+        goto fail;
+
+    return 0;
+
+fail:
+    pic_arrays_free(s);
+    return AVERROR(ENOMEM);
+}
+
+static int pred_weight_table(HEVCContext *s, GetBitContext *gb)
+{
+    int i = 0;
+    int j = 0;
+    uint8_t luma_weight_l0_flag[16];
+    uint8_t chroma_weight_l0_flag[16];
+    uint8_t luma_weight_l1_flag[16];
+    uint8_t chroma_weight_l1_flag[16];
+    int luma_log2_weight_denom;
+
+    luma_log2_weight_denom = get_ue_golomb_long(gb);
+    if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7) {
+        av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
+        return AVERROR_INVALIDDATA;
+    }
+    s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
+    if (s->ps.sps->chroma_format_idc != 0) {
+        int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb);
+        if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) {
+            av_log(s->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %"PRId64" is invalid\n", chroma_log2_weight_denom);
+            return AVERROR_INVALIDDATA;
+        }
+        s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom;
+    }
+
+    for (i = 0; i < s->sh.nb_refs[L0]; i++) {
+        luma_weight_l0_flag[i] = get_bits1(gb);
+        if (!luma_weight_l0_flag[i]) {
+            s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
+            s->sh.luma_offset_l0[i] = 0;
+        }
+    }
+    if (s->ps.sps->chroma_format_idc != 0) {
+        for (i = 0; i < s->sh.nb_refs[L0]; i++)
+            chroma_weight_l0_flag[i] = get_bits1(gb);
+    } else {
+        for (i = 0; i < s->sh.nb_refs[L0]; i++)
+            chroma_weight_l0_flag[i] = 0;
+    }
+    for (i = 0; i < s->sh.nb_refs[L0]; i++) {
+        if (luma_weight_l0_flag[i]) {
+            int delta_luma_weight_l0 = get_se_golomb(gb);
+            if ((int8_t)delta_luma_weight_l0 != delta_luma_weight_l0)
+                return AVERROR_INVALIDDATA;
+            s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
+            s->sh.luma_offset_l0[i] = get_se_golomb(gb);
+        }
+        if (chroma_weight_l0_flag[i]) {
+            for (j = 0; j < 2; j++) {
+                int delta_chroma_weight_l0 = get_se_golomb(gb);
+                int delta_chroma_offset_l0 = get_se_golomb(gb);
+
+                if (   (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0
+                    || delta_chroma_offset_l0 < -(1<<17) || delta_chroma_offset_l0 > (1<<17)) {
+                    return AVERROR_INVALIDDATA;
+                }
+
+                s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
+                s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
+                                                                                    >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
+            }
+        } else {
+            s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
+            s->sh.chroma_offset_l0[i][0] = 0;
+            s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
+            s->sh.chroma_offset_l0[i][1] = 0;
+        }
+    }
+    if (s->sh.slice_type == HEVC_SLICE_B) {
+        for (i = 0; i < s->sh.nb_refs[L1]; i++) {
+            luma_weight_l1_flag[i] = get_bits1(gb);
+            if (!luma_weight_l1_flag[i]) {
+                s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
+                s->sh.luma_offset_l1[i] = 0;
+            }
+        }
+        if (s->ps.sps->chroma_format_idc != 0) {
+            for (i = 0; i < s->sh.nb_refs[L1]; i++)
+                chroma_weight_l1_flag[i] = get_bits1(gb);
+        } else {
+            for (i = 0; i < s->sh.nb_refs[L1]; i++)
+                chroma_weight_l1_flag[i] = 0;
+        }
+        for (i = 0; i < s->sh.nb_refs[L1]; i++) {
+            if (luma_weight_l1_flag[i]) {
+                int delta_luma_weight_l1 = get_se_golomb(gb);
+                if ((int8_t)delta_luma_weight_l1 != delta_luma_weight_l1)
+                    return AVERROR_INVALIDDATA;
+                s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
+                s->sh.luma_offset_l1[i] = get_se_golomb(gb);
+            }
+            if (chroma_weight_l1_flag[i]) {
+                for (j = 0; j < 2; j++) {
+                    int delta_chroma_weight_l1 = get_se_golomb(gb);
+                    int delta_chroma_offset_l1 = get_se_golomb(gb);
+
+                    if (   (int8_t)delta_chroma_weight_l1 != delta_chroma_weight_l1
+                        || delta_chroma_offset_l1 < -(1<<17) || delta_chroma_offset_l1 > (1<<17)) {
+                        return AVERROR_INVALIDDATA;
+                    }
+
+                    s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
+                    s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
+                                                                                        >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
+                }
+            } else {
+                s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
+                s->sh.chroma_offset_l1[i][0] = 0;
+                s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
+                s->sh.chroma_offset_l1[i][1] = 0;
+            }
+        }
+    }
+    return 0;
+}
+
+static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
+{
+    const HEVCSPS *sps = s->ps.sps;
+    int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
+    int prev_delta_msb = 0;
+    unsigned int nb_sps = 0, nb_sh;
+    int i;
+
+    rps->nb_refs = 0;
+    if (!sps->long_term_ref_pics_present)
+        return 0;
+
+    if (sps->num_long_term_ref_pics_sps > 0)
+        nb_sps = get_ue_golomb_long(gb);
+    nb_sh = get_ue_golomb_long(gb);
+
+    if (nb_sps > sps->num_long_term_ref_pics_sps)
+        return AVERROR_INVALIDDATA;
+    if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
+        return AVERROR_INVALIDDATA;
+
+    rps->nb_refs = nb_sh + nb_sps;
+
+    for (i = 0; i < rps->nb_refs; i++) {
+
+        if (i < nb_sps) {
+            uint8_t lt_idx_sps = 0;
+
+            if (sps->num_long_term_ref_pics_sps > 1)
+                lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
+
+            rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
+            rps->used[i] = !!(sps->used_by_curr_pic_lt & (1 << lt_idx_sps));
+        } else {
+            rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
+            rps->used[i] = get_bits1(gb);
+        }
+
+        rps->poc_msb_present[i] = get_bits1(gb);
+        if (rps->poc_msb_present[i]) {
+            int64_t delta = get_ue_golomb_long(gb);
+            int64_t poc;
+
+            if (i && i != nb_sps)
+                delta += prev_delta_msb;
+
+            poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
+            if (poc != (int32_t)poc)
+                return AVERROR_INVALIDDATA;
+            rps->poc[i] = poc;
+            prev_delta_msb = delta;
+        }
+    }
+
+    return 0;
+}
+
+static void export_stream_params(HEVCContext *s, const HEVCSPS *sps)
+{
+    AVCodecContext *avctx = s->avctx;
+    const HEVCParamSets *ps = &s->ps;
+    const HEVCVPS *vps = ps->vps_list[sps->vps_id];
+    const HEVCWindow *ow = &sps->output_window;
+    unsigned int num = 0, den = 0;
+
+    avctx->pix_fmt             = sps->pix_fmt;
+    avctx->coded_width         = sps->width;
+    avctx->coded_height        = sps->height;
+    avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
+    avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
+    avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
+    avctx->profile             = sps->ptl.general_ptl.profile_idc;
+    avctx->level               = sps->ptl.general_ptl.level_idc;
+
+    ff_set_sar(avctx, sps->vui.common.sar);
+
+    if (sps->vui.common.video_signal_type_present_flag)
+        avctx->color_range = sps->vui.common.video_full_range_flag ? AVCOL_RANGE_JPEG
+                                                                   : AVCOL_RANGE_MPEG;
+    else
+        avctx->color_range = AVCOL_RANGE_MPEG;
+
+    if (sps->vui.common.colour_description_present_flag) {
+        avctx->color_primaries = sps->vui.common.colour_primaries;
+        avctx->color_trc       = sps->vui.common.transfer_characteristics;
+        avctx->colorspace      = sps->vui.common.matrix_coeffs;
+    } else {
+        avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
+        avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
+        avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
+    }
+
+    avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
+    if (sps->chroma_format_idc == 1) {
+        if (sps->vui.common.chroma_loc_info_present_flag) {
+            if (sps->vui.common.chroma_sample_loc_type_top_field <= 5)
+                avctx->chroma_sample_location = sps->vui.common.chroma_sample_loc_type_top_field + 1;
+        } else
+            avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
+    }
+
+    if (vps->vps_timing_info_present_flag) {
+        num = vps->vps_num_units_in_tick;
+        den = vps->vps_time_scale;
+    } else if (sps->vui.vui_timing_info_present_flag) {
+        num = sps->vui.vui_num_units_in_tick;
+        den = sps->vui.vui_time_scale;
+    }
+
+    if (num != 0 && den != 0)
+        av_reduce(&avctx->framerate.den, &avctx->framerate.num,
+                  num, den, 1 << 30);
+}
+
+static int export_stream_params_from_sei(HEVCContext *s)
+{
+    AVCodecContext *avctx = s->avctx;
+
+    if (s->sei.common.a53_caption.buf_ref)
+        s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
+
+    if (s->sei.common.alternative_transfer.present &&
+        av_color_transfer_name(s->sei.common.alternative_transfer.preferred_transfer_characteristics) &&
+        s->sei.common.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
+        avctx->color_trc = s->sei.common.alternative_transfer.preferred_transfer_characteristics;
+    }
+
+    if (s->sei.common.film_grain_characteristics.present ||
+        s->sei.common.aom_film_grain.enable)
+        avctx->properties |= FF_CODEC_PROPERTY_FILM_GRAIN;
+
+    return 0;
+}
+
+static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
+{
+#define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
+                     CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
+                     CONFIG_HEVC_D3D12VA_HWACCEL + \
+                     CONFIG_HEVC_NVDEC_HWACCEL + \
+                     CONFIG_HEVC_VAAPI_HWACCEL + \
+                     CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
+                     CONFIG_HEVC_VDPAU_HWACCEL + \
+                     CONFIG_HEVC_VULKAN_HWACCEL)
+    enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
+
+    switch (sps->pix_fmt) {
+    case AV_PIX_FMT_YUV420P:
+    case AV_PIX_FMT_YUVJ420P:
+#if CONFIG_HEVC_DXVA2_HWACCEL
+        *fmt++ = AV_PIX_FMT_DXVA2_VLD;
+#endif
+#if CONFIG_HEVC_D3D11VA_HWACCEL
+        *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
+        *fmt++ = AV_PIX_FMT_D3D11;
+#endif
+#if CONFIG_HEVC_D3D12VA_HWACCEL
+        *fmt++ = AV_PIX_FMT_D3D12;
+#endif
+#if CONFIG_HEVC_VAAPI_HWACCEL
+        *fmt++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_HEVC_VDPAU_HWACCEL
+        *fmt++ = AV_PIX_FMT_VDPAU;
+#endif
+#if CONFIG_HEVC_NVDEC_HWACCEL
+        *fmt++ = AV_PIX_FMT_CUDA;
+#endif
+#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
+        *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
+        break;
+    case AV_PIX_FMT_YUV420P10:
+#if CONFIG_HEVC_DXVA2_HWACCEL
+        *fmt++ = AV_PIX_FMT_DXVA2_VLD;
+#endif
+#if CONFIG_HEVC_D3D11VA_HWACCEL
+        *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
+        *fmt++ = AV_PIX_FMT_D3D11;
+#endif
+#if CONFIG_HEVC_D3D12VA_HWACCEL
+        *fmt++ = AV_PIX_FMT_D3D12;
+#endif
+#if CONFIG_HEVC_VAAPI_HWACCEL
+        *fmt++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
+        *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
+#if CONFIG_HEVC_VDPAU_HWACCEL
+        *fmt++ = AV_PIX_FMT_VDPAU;
+#endif
+#if CONFIG_HEVC_NVDEC_HWACCEL
+        *fmt++ = AV_PIX_FMT_CUDA;
+#endif
+        break;
+    case AV_PIX_FMT_YUV444P:
+#if CONFIG_HEVC_VAAPI_HWACCEL
+        *fmt++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_HEVC_VDPAU_HWACCEL
+        *fmt++ = AV_PIX_FMT_VDPAU;
+#endif
+#if CONFIG_HEVC_NVDEC_HWACCEL
+        *fmt++ = AV_PIX_FMT_CUDA;
+#endif
+#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
+        *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
+        break;
+    case AV_PIX_FMT_YUV422P:
+    case AV_PIX_FMT_YUV422P10LE:
+#if CONFIG_HEVC_VAAPI_HWACCEL
+       *fmt++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
+        *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
+        break;
+    case AV_PIX_FMT_YUV444P10:
+#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
+        *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+    /* NOTE: fallthrough */
+    case AV_PIX_FMT_YUV420P12:
+    case AV_PIX_FMT_YUV444P12:
+#if CONFIG_HEVC_VAAPI_HWACCEL
+       *fmt++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_HEVC_VDPAU_HWACCEL
+        *fmt++ = AV_PIX_FMT_VDPAU;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
+#if CONFIG_HEVC_NVDEC_HWACCEL
+        *fmt++ = AV_PIX_FMT_CUDA;
+#endif
+        break;
+    case AV_PIX_FMT_YUV422P12:
+#if CONFIG_HEVC_VAAPI_HWACCEL
+       *fmt++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
+        break;
+    }
+
+    *fmt++ = sps->pix_fmt;
+    *fmt = AV_PIX_FMT_NONE;
+
+    return ff_get_format(s->avctx, pix_fmts);
+}
+
+static int set_sps(HEVCContext *s, const HEVCSPS *sps,
+                   enum AVPixelFormat pix_fmt)
+{
+    int ret, i;
+
+    pic_arrays_free(s);
+    s->ps.sps = NULL;
+    s->ps.vps = NULL;
+
+    if (!sps)
+        return 0;
+
+    ret = pic_arrays_init(s, sps);
+    if (ret < 0)
+        goto fail;
+
+    export_stream_params(s, sps);
+
+    s->avctx->pix_fmt = pix_fmt;
+
+    ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
+    ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
+    ff_videodsp_init (&s->vdsp,    sps->bit_depth);
+
+    for (i = 0; i < 3; i++) {
+        av_freep(&s->sao_pixel_buffer_h[i]);
+        av_freep(&s->sao_pixel_buffer_v[i]);
+    }
+
+    if (sps->sao_enabled && !s->avctx->hwaccel) {
+        int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
+        int c_idx;
+
+        for(c_idx = 0; c_idx < c_count; c_idx++) {
+            int w = sps->width >> sps->hshift[c_idx];
+            int h = sps->height >> sps->vshift[c_idx];
+            s->sao_pixel_buffer_h[c_idx] =
+                av_malloc((w * 2 * sps->ctb_height) <<
+                          sps->pixel_shift);
+            s->sao_pixel_buffer_v[c_idx] =
+                av_malloc((h * 2 * sps->ctb_width) <<
+                          sps->pixel_shift);
+            if (!s->sao_pixel_buffer_h[c_idx] ||
+                !s->sao_pixel_buffer_v[c_idx])
+                goto fail;
+        }
+    }
+
+    s->ps.sps = sps;
+    s->ps.vps = s->ps.vps_list[s->ps.sps->vps_id];
+
+    return 0;
+
+fail:
+    pic_arrays_free(s);
+    for (i = 0; i < 3; i++) {
+        av_freep(&s->sao_pixel_buffer_h[i]);
+        av_freep(&s->sao_pixel_buffer_v[i]);
+    }
+    s->ps.sps = NULL;
+    return ret;
+}
+
+static int hls_slice_header(HEVCContext *s, GetBitContext *gb)
+{
+    SliceHeader *sh   = &s->sh;
+    int i, ret;
+
+    // Coded parameters
+    sh->first_slice_in_pic_flag = get_bits1(gb);
+    if (s->cur_frame && sh->first_slice_in_pic_flag) {
+        av_log(s->avctx, AV_LOG_ERROR, "Two slices reporting being the first in the same frame.\n");
+        return 1; // This slice will be skipped later, do not corrupt state
+    }
+
+    if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
+        s->seq_decode = (s->seq_decode + 1) & HEVC_SEQUENCE_COUNTER_MASK;
+        s->max_ra     = INT_MAX;
+        if (IS_IDR(s))
+            ff_hevc_clear_refs(s);
+    }
+    sh->no_output_of_prior_pics_flag = 0;
+    if (IS_IRAP(s))
+        sh->no_output_of_prior_pics_flag = get_bits1(gb);
+
+    sh->pps_id = get_ue_golomb_long(gb);
+    if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
+        av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
+        return AVERROR_INVALIDDATA;
+    }
+    if (!sh->first_slice_in_pic_flag &&
+        s->ps.pps != s->ps.pps_list[sh->pps_id]) {
+        av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
+        return AVERROR_INVALIDDATA;
+    }
+    s->ps.pps = s->ps.pps_list[sh->pps_id];
+    if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
+        sh->no_output_of_prior_pics_flag = 1;
+
+    if (s->ps.sps != s->ps.sps_list[s->ps.pps->sps_id]) {
+        const HEVCSPS *sps = s->ps.sps_list[s->ps.pps->sps_id];
+        enum AVPixelFormat pix_fmt;
+
+        ff_hevc_clear_refs(s);
+
+        ret = set_sps(s, sps, sps->pix_fmt);
+        if (ret < 0)
+            return ret;
+
+        pix_fmt = get_format(s, sps);
+        if (pix_fmt < 0)
+            return pix_fmt;
+        s->avctx->pix_fmt = pix_fmt;
+
+        s->seq_decode = (s->seq_decode + 1) & HEVC_SEQUENCE_COUNTER_MASK;
+        s->max_ra     = INT_MAX;
+    }
+
+    ret = export_stream_params_from_sei(s);
+    if (ret < 0)
+        return ret;
+
+    sh->dependent_slice_segment_flag = 0;
+    if (!sh->first_slice_in_pic_flag) {
+        int slice_address_length;
+
+        if (s->ps.pps->dependent_slice_segments_enabled_flag)
+            sh->dependent_slice_segment_flag = get_bits1(gb);
+
+        slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
+                                            s->ps.sps->ctb_height);
+        sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
+        if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
+            av_log(s->avctx, AV_LOG_ERROR,
+                   "Invalid slice segment address: %u.\n",
+                   sh->slice_segment_addr);
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (!sh->dependent_slice_segment_flag) {
+            sh->slice_addr = sh->slice_segment_addr;
+            s->slice_idx++;
+        }
+    } else {
+        sh->slice_segment_addr = sh->slice_addr = 0;
+        s->slice_idx           = 0;
+        s->slice_initialized   = 0;
+    }
+
+    if (!sh->dependent_slice_segment_flag) {
+        s->slice_initialized = 0;
+
+        for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
+            skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
+
+        sh->slice_type = get_ue_golomb_long(gb);
+        if (!(sh->slice_type == HEVC_SLICE_I ||
+              sh->slice_type == HEVC_SLICE_P ||
+              sh->slice_type == HEVC_SLICE_B)) {
+            av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
+                   sh->slice_type);
+            return AVERROR_INVALIDDATA;
+        }
+        if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I &&
+            !s->ps.pps->pps_curr_pic_ref_enabled_flag) {
+            av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        // when flag is not present, picture is inferred to be output
+        sh->pic_output_flag = 1;
+        if (s->ps.pps->output_flag_present_flag)
+            sh->pic_output_flag = get_bits1(gb);
+
+        if (s->ps.sps->separate_colour_plane)
+            sh->colour_plane_id = get_bits(gb, 2);
+
+        if (!IS_IDR(s)) {
+            int poc, pos;
+
+            sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
+            poc = ff_hevc_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type);
+            if (!sh->first_slice_in_pic_flag && poc != s->poc) {
+                av_log(s->avctx, AV_LOG_WARNING,
+                       "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
+                if (s->avctx->err_recognition & AV_EF_EXPLODE)
+                    return AVERROR_INVALIDDATA;
+                poc = s->poc;
+            }
+            s->poc = poc;
+
+            sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
+            pos = get_bits_left(gb);
+            if (!sh->short_term_ref_pic_set_sps_flag) {
+                ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
+                if (ret < 0)
+                    return ret;
+
+                sh->short_term_rps = &sh->slice_rps;
+            } else {
+                int numbits, rps_idx;
+
+                if (!s->ps.sps->nb_st_rps) {
+                    av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
+                    return AVERROR_INVALIDDATA;
+                }
+
+                numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
+                rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
+                sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
+            }
+            sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
+
+            pos = get_bits_left(gb);
+            ret = decode_lt_rps(s, &sh->long_term_rps, gb);
+            if (ret < 0) {
+                av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
+                if (s->avctx->err_recognition & AV_EF_EXPLODE)
+                    return AVERROR_INVALIDDATA;
+            }
+            sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
+
+            if (s->ps.sps->temporal_mvp_enabled)
+                sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
+            else
+                sh->slice_temporal_mvp_enabled_flag = 0;
+        } else {
+            s->poc                              = 0;
+            sh->pic_order_cnt_lsb               = 0;
+            sh->short_term_ref_pic_set_sps_flag = 0;
+            sh->short_term_ref_pic_set_size     = 0;
+            sh->short_term_rps                  = NULL;
+            sh->long_term_ref_pic_set_size      = 0;
+            sh->slice_temporal_mvp_enabled_flag = 0;
+        }
+
+        /* 8.3.1 */
+        if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
+            s->nal_unit_type != HEVC_NAL_TRAIL_N &&
+            s->nal_unit_type != HEVC_NAL_TSA_N   &&
+            s->nal_unit_type != HEVC_NAL_STSA_N  &&
+            s->nal_unit_type != HEVC_NAL_RADL_N  &&
+            s->nal_unit_type != HEVC_NAL_RADL_R  &&
+            s->nal_unit_type != HEVC_NAL_RASL_N  &&
+            s->nal_unit_type != HEVC_NAL_RASL_R)
+            s->pocTid0 = s->poc;
+
+        if (s->ps.sps->sao_enabled) {
+            sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
+            if (s->ps.sps->chroma_format_idc) {
+                sh->slice_sample_adaptive_offset_flag[1] =
+                sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
+            }
+        } else {
+            sh->slice_sample_adaptive_offset_flag[0] = 0;
+            sh->slice_sample_adaptive_offset_flag[1] = 0;
+            sh->slice_sample_adaptive_offset_flag[2] = 0;
+        }
+
+        sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
+        if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
+            int nb_refs;
+
+            sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
+            if (sh->slice_type == HEVC_SLICE_B)
+                sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
+
+            if (get_bits1(gb)) { // num_ref_idx_active_override_flag
+                sh->nb_refs[L0] = get_ue_golomb_31(gb) + 1;
+                if (sh->slice_type == HEVC_SLICE_B)
+                    sh->nb_refs[L1] = get_ue_golomb_31(gb) + 1;
+            }
+            if (sh->nb_refs[L0] >= HEVC_MAX_REFS || sh->nb_refs[L1] >= HEVC_MAX_REFS) {
+                av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
+                       sh->nb_refs[L0], sh->nb_refs[L1]);
+                return AVERROR_INVALIDDATA;
+            }
+
+            sh->rpl_modification_flag[0] = 0;
+            sh->rpl_modification_flag[1] = 0;
+            nb_refs = ff_hevc_frame_nb_refs(s);
+            if (!nb_refs) {
+                av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
+                return AVERROR_INVALIDDATA;
+            }
+
+            if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
+                sh->rpl_modification_flag[0] = get_bits1(gb);
+                if (sh->rpl_modification_flag[0]) {
+                    for (i = 0; i < sh->nb_refs[L0]; i++)
+                        sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
+                }
+
+                if (sh->slice_type == HEVC_SLICE_B) {
+                    sh->rpl_modification_flag[1] = get_bits1(gb);
+                    if (sh->rpl_modification_flag[1] == 1)
+                        for (i = 0; i < sh->nb_refs[L1]; i++)
+                            sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
+                }
+            }
+
+            if (sh->slice_type == HEVC_SLICE_B)
+                sh->mvd_l1_zero_flag = get_bits1(gb);
+
+            if (s->ps.pps->cabac_init_present_flag)
+                sh->cabac_init_flag = get_bits1(gb);
+            else
+                sh->cabac_init_flag = 0;
+
+            sh->collocated_ref_idx = 0;
+            if (sh->slice_temporal_mvp_enabled_flag) {
+                sh->collocated_list = L0;
+                if (sh->slice_type == HEVC_SLICE_B)
+                    sh->collocated_list = !get_bits1(gb);
+
+                if (sh->nb_refs[sh->collocated_list] > 1) {
+                    sh->collocated_ref_idx = get_ue_golomb_long(gb);
+                    if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
+                        av_log(s->avctx, AV_LOG_ERROR,
+                               "Invalid collocated_ref_idx: %d.\n",
+                               sh->collocated_ref_idx);
+                        return AVERROR_INVALIDDATA;
+                    }
+                }
+            }
+
+            if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
+                (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
+                int ret = pred_weight_table(s, gb);
+                if (ret < 0)
+                    return ret;
+            }
+
+            sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
+            if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
+                av_log(s->avctx, AV_LOG_ERROR,
+                       "Invalid number of merging MVP candidates: %d.\n",
+                       sh->max_num_merge_cand);
+                return AVERROR_INVALIDDATA;
+            }
+
+            // Syntax in 7.3.6.1
+            if (s->ps.sps->motion_vector_resolution_control_idc == 2)
+                sh->use_integer_mv_flag = get_bits1(gb);
+            else
+                // Inferred to be equal to motion_vector_resolution_control_idc if not present
+                sh->use_integer_mv_flag = s->ps.sps->motion_vector_resolution_control_idc;
+
+        }
+
+        sh->slice_qp_delta = get_se_golomb(gb);
+
+        if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
+            sh->slice_cb_qp_offset = get_se_golomb(gb);
+            sh->slice_cr_qp_offset = get_se_golomb(gb);
+            if (sh->slice_cb_qp_offset < -12 || sh->slice_cb_qp_offset > 12 ||
+                sh->slice_cr_qp_offset < -12 || sh->slice_cr_qp_offset > 12) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid slice cx qp offset.\n");
+                return AVERROR_INVALIDDATA;
+            }
+        } else {
+            sh->slice_cb_qp_offset = 0;
+            sh->slice_cr_qp_offset = 0;
+        }
+
+        if (s->ps.pps->pps_slice_act_qp_offsets_present_flag) {
+            sh->slice_act_y_qp_offset  = get_se_golomb(gb);
+            sh->slice_act_cb_qp_offset = get_se_golomb(gb);
+            sh->slice_act_cr_qp_offset = get_se_golomb(gb);
+        }
+
+        if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
+            sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
+        else
+            sh->cu_chroma_qp_offset_enabled_flag = 0;
+
+        if (s->ps.pps->deblocking_filter_control_present_flag) {
+            int deblocking_filter_override_flag = 0;
+
+            if (s->ps.pps->deblocking_filter_override_enabled_flag)
+                deblocking_filter_override_flag = get_bits1(gb);
+
+            if (deblocking_filter_override_flag) {
+                sh->disable_deblocking_filter_flag = get_bits1(gb);
+                if (!sh->disable_deblocking_filter_flag) {
+                    int beta_offset_div2 = get_se_golomb(gb);
+                    int tc_offset_div2   = get_se_golomb(gb) ;
+                    if (beta_offset_div2 < -6 || beta_offset_div2 > 6 ||
+                        tc_offset_div2   < -6 || tc_offset_div2   > 6) {
+                        av_log(s->avctx, AV_LOG_ERROR,
+                            "Invalid deblock filter offsets: %d, %d\n",
+                            beta_offset_div2, tc_offset_div2);
+                        return AVERROR_INVALIDDATA;
+                    }
+                    sh->beta_offset = beta_offset_div2 * 2;
+                    sh->tc_offset   =   tc_offset_div2 * 2;
+                }
+            } else {
+                sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
+                sh->beta_offset                    = s->ps.pps->beta_offset;
+                sh->tc_offset                      = s->ps.pps->tc_offset;
+            }
+        } else {
+            sh->disable_deblocking_filter_flag = 0;
+            sh->beta_offset                    = 0;
+            sh->tc_offset                      = 0;
+        }
+
+        if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
+            (sh->slice_sample_adaptive_offset_flag[0] ||
+             sh->slice_sample_adaptive_offset_flag[1] ||
+             !sh->disable_deblocking_filter_flag)) {
+            sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
+        } else {
+            sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
+        }
+    } else if (!s->slice_initialized) {
+        av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    sh->num_entry_point_offsets = 0;
+    if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
+        unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
+        // It would be possible to bound this tighter but this here is simpler
+        if (num_entry_point_offsets > get_bits_left(gb)) {
+            av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
+            return AVERROR_INVALIDDATA;
+        }
+
+        sh->num_entry_point_offsets = num_entry_point_offsets;
+        if (sh->num_entry_point_offsets > 0) {
+            int offset_len = get_ue_golomb_long(gb) + 1;
+
+            if (offset_len < 1 || offset_len > 32) {
+                sh->num_entry_point_offsets = 0;
+                av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
+                return AVERROR_INVALIDDATA;
+            }
+
+            av_freep(&sh->entry_point_offset);
+            av_freep(&sh->offset);
+            av_freep(&sh->size);
+            sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
+            sh->offset             = av_malloc_array(sh->num_entry_point_offsets + 1, sizeof(int));
+            sh->size               = av_malloc_array(sh->num_entry_point_offsets + 1, sizeof(int));
+            if (!sh->entry_point_offset || !sh->offset || !sh->size) {
+                sh->num_entry_point_offsets = 0;
+                av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
+                return AVERROR(ENOMEM);
+            }
+            for (i = 0; i < sh->num_entry_point_offsets; i++) {
+                unsigned val = get_bits_long(gb, offset_len);
+                sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
+            }
+            if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
+                s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
+                s->threads_number = 1;
+            } else
+                s->enable_parallel_tiles = 0;
+        } else
+            s->enable_parallel_tiles = 0;
+    }
+
+    if (s->ps.pps->slice_header_extension_present_flag) {
+        unsigned int length = get_ue_golomb_long(gb);
+        if (length*8LL > get_bits_left(gb)) {
+            av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
+            return AVERROR_INVALIDDATA;
+        }
+        for (i = 0; i < length; i++)
+            skip_bits(gb, 8);  // slice_header_extension_data_byte
+    }
+
+    ret = get_bits1(gb);
+    if (!ret) {
+        av_log(s->avctx, AV_LOG_ERROR, "alignment_bit_equal_to_one=0\n");
+        return AVERROR_INVALIDDATA;
+    }
+    sh->data_offset = align_get_bits(gb) - gb->buffer;
+
+    // Inferred parameters
+    sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
+    if (sh->slice_qp > 51 ||
+        sh->slice_qp < -s->ps.sps->qp_bd_offset) {
+        av_log(s->avctx, AV_LOG_ERROR,
+               "The slice_qp %d is outside the valid range "
+               "[%d, 51].\n",
+               sh->slice_qp,
+               -s->ps.sps->qp_bd_offset);
+        return AVERROR_INVALIDDATA;
+    }
+
+    sh->slice_ctb_addr_rs = sh->slice_segment_addr;
+
+    if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
+        av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (get_bits_left(gb) < 0) {
+        av_log(s->avctx, AV_LOG_ERROR,
+               "Overread slice header by %d bits\n", -get_bits_left(gb));
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->local_ctx[0].first_qp_group = !s->sh.dependent_slice_segment_flag;
+
+    if (!s->ps.pps->cu_qp_delta_enabled_flag)
+        s->local_ctx[0].qp_y = s->sh.slice_qp;
+
+    s->slice_initialized = 1;
+    s->local_ctx[0].tu.cu_qp_offset_cb = 0;
+    s->local_ctx[0].tu.cu_qp_offset_cr = 0;
+
+    return 0;
+}
+
+#define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
+
+#define SET_SAO(elem, value)                            \
+do {                                                    \
+    if (!sao_merge_up_flag && !sao_merge_left_flag)     \
+        sao->elem = value;                              \
+    else if (sao_merge_left_flag)                       \
+        sao->elem = CTB(s->sao, rx-1, ry).elem;         \
+    else if (sao_merge_up_flag)                         \
+        sao->elem = CTB(s->sao, rx, ry-1).elem;         \
+    else                                                \
+        sao->elem = 0;                                  \
+} while (0)
+
+static void hls_sao_param(HEVCLocalContext *lc, int rx, int ry)
+{
+    const HEVCContext *const s = lc->parent;
+    int sao_merge_left_flag = 0;
+    int sao_merge_up_flag   = 0;
+    SAOParams *sao          = &CTB(s->sao, rx, ry);
+    int c_idx, i;
+
+    if (s->sh.slice_sample_adaptive_offset_flag[0] ||
+        s->sh.slice_sample_adaptive_offset_flag[1]) {
+        if (rx > 0) {
+            if (lc->ctb_left_flag)
+                sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(lc);
+        }
+        if (ry > 0 && !sao_merge_left_flag) {
+            if (lc->ctb_up_flag)
+                sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(lc);
+        }
+    }
+
+    for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
+        int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
+                                                 s->ps.pps->log2_sao_offset_scale_chroma;
+
+        if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
+            sao->type_idx[c_idx] = SAO_NOT_APPLIED;
+            continue;
+        }
+
+        if (c_idx == 2) {
+            sao->type_idx[2] = sao->type_idx[1];
+            sao->eo_class[2] = sao->eo_class[1];
+        } else {
+            SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(lc));
+        }
+
+        if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
+            continue;
+
+        for (i = 0; i < 4; i++)
+            SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(lc));
+
+        if (sao->type_idx[c_idx] == SAO_BAND) {
+            for (i = 0; i < 4; i++) {
+                if (sao->offset_abs[c_idx][i]) {
+                    SET_SAO(offset_sign[c_idx][i],
+                            ff_hevc_sao_offset_sign_decode(lc));
+                } else {
+                    sao->offset_sign[c_idx][i] = 0;
+                }
+            }
+            SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(lc));
+        } else if (c_idx != 2) {
+            SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(lc));
+        }
+
+        // Inferred parameters
+        sao->offset_val[c_idx][0] = 0;
+        for (i = 0; i < 4; i++) {
+            sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
+            if (sao->type_idx[c_idx] == SAO_EDGE) {
+                if (i > 1)
+                    sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
+            } else if (sao->offset_sign[c_idx][i]) {
+                sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
+            }
+            sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
+        }
+    }
+}
+
+#undef SET_SAO
+#undef CTB
+
+static int hls_cross_component_pred(HEVCLocalContext *lc, int idx)
+{
+    int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(lc, idx);
+
+    if (log2_res_scale_abs_plus1 !=  0) {
+        int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(lc, idx);
+        lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
+                               (1 - 2 * res_scale_sign_flag);
+    } else {
+        lc->tu.res_scale_val = 0;
+    }
+
+
+    return 0;
+}
+
+static int hls_transform_unit(HEVCLocalContext *lc, int x0, int y0,
+                              int xBase, int yBase, int cb_xBase, int cb_yBase,
+                              int log2_cb_size, int log2_trafo_size,
+                              int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
+{
+    const HEVCContext *const s = lc->parent;
+    const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
+    int i;
+
+    if (lc->cu.pred_mode == MODE_INTRA) {
+        int trafo_size = 1 << log2_trafo_size;
+        ff_hevc_set_neighbour_available(lc, x0, y0, trafo_size, trafo_size);
+
+        s->hpc.intra_pred[log2_trafo_size - 2](lc, x0, y0, 0);
+    }
+
+    if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
+        (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
+        int scan_idx   = SCAN_DIAG;
+        int scan_idx_c = SCAN_DIAG;
+        int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
+                         (s->ps.sps->chroma_format_idc == 2 &&
+                         (cbf_cb[1] || cbf_cr[1]));
+
+        if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
+            lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(lc);
+            if (lc->tu.cu_qp_delta != 0)
+                if (ff_hevc_cu_qp_delta_sign_flag(lc) == 1)
+                    lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
+            lc->tu.is_cu_qp_delta_coded = 1;
+
+            if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
+                lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
+                av_log(s->avctx, AV_LOG_ERROR,
+                       "The cu_qp_delta %d is outside the valid range "
+                       "[%d, %d].\n",
+                       lc->tu.cu_qp_delta,
+                       -(26 + s->ps.sps->qp_bd_offset / 2),
+                        (25 + s->ps.sps->qp_bd_offset / 2));
+                return AVERROR_INVALIDDATA;
+            }
+
+            ff_hevc_set_qPy(lc, cb_xBase, cb_yBase, log2_cb_size);
+        }
+
+        if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
+            !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
+            int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(lc);
+            if (cu_chroma_qp_offset_flag) {
+                int cu_chroma_qp_offset_idx  = 0;
+                if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
+                    cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(lc);
+                    av_log(s->avctx, AV_LOG_ERROR,
+                        "cu_chroma_qp_offset_idx not yet tested.\n");
+                }
+                lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
+                lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
+            } else {
+                lc->tu.cu_qp_offset_cb = 0;
+                lc->tu.cu_qp_offset_cr = 0;
+            }
+            lc->tu.is_cu_chroma_qp_offset_coded = 1;
+        }
+
+        if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
+            if (lc->tu.intra_pred_mode >= 6 &&
+                lc->tu.intra_pred_mode <= 14) {
+                scan_idx = SCAN_VERT;
+            } else if (lc->tu.intra_pred_mode >= 22 &&
+                       lc->tu.intra_pred_mode <= 30) {
+                scan_idx = SCAN_HORIZ;
+            }
+
+            if (lc->tu.intra_pred_mode_c >=  6 &&
+                lc->tu.intra_pred_mode_c <= 14) {
+                scan_idx_c = SCAN_VERT;
+            } else if (lc->tu.intra_pred_mode_c >= 22 &&
+                       lc->tu.intra_pred_mode_c <= 30) {
+                scan_idx_c = SCAN_HORIZ;
+            }
+        }
+
+        lc->tu.cross_pf = 0;
+
+        if (cbf_luma)
+            ff_hevc_hls_residual_coding(lc, x0, y0, log2_trafo_size, scan_idx, 0);
+        if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
+            int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
+            int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
+            lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
+                                (lc->cu.pred_mode == MODE_INTER ||
+                                 (lc->tu.chroma_mode_c ==  4)));
+
+            if (lc->tu.cross_pf) {
+                hls_cross_component_pred(lc, 0);
+            }
+            for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
+                if (lc->cu.pred_mode == MODE_INTRA) {
+                    ff_hevc_set_neighbour_available(lc, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
+                    s->hpc.intra_pred[log2_trafo_size_c - 2](lc, x0, y0 + (i << log2_trafo_size_c), 1);
+                }
+                if (cbf_cb[i])
+                    ff_hevc_hls_residual_coding(lc, x0, y0 + (i << log2_trafo_size_c),
+                                                log2_trafo_size_c, scan_idx_c, 1);
+                else
+                    if (lc->tu.cross_pf) {
+                        ptrdiff_t stride = s->cur_frame->f->linesize[1];
+                        int hshift = s->ps.sps->hshift[1];
+                        int vshift = s->ps.sps->vshift[1];
+                        const int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
+                        int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
+                        int size = 1 << log2_trafo_size_c;
+
+                        uint8_t *dst = &s->cur_frame->f->data[1][(y0 >> vshift) * stride +
+                                                              ((x0 >> hshift) << s->ps.sps->pixel_shift)];
+                        for (i = 0; i < (size * size); i++) {
+                            coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
+                        }
+                        s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
+                    }
+            }
+
+            if (lc->tu.cross_pf) {
+                hls_cross_component_pred(lc, 1);
+            }
+            for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
+                if (lc->cu.pred_mode == MODE_INTRA) {
+                    ff_hevc_set_neighbour_available(lc, x0, y0 + (i << log2_trafo_size_c),
+                                                    trafo_size_h, trafo_size_v);
+                    s->hpc.intra_pred[log2_trafo_size_c - 2](lc, x0, y0 + (i << log2_trafo_size_c), 2);
+                }
+                if (cbf_cr[i])
+                    ff_hevc_hls_residual_coding(lc, x0, y0 + (i << log2_trafo_size_c),
+                                                log2_trafo_size_c, scan_idx_c, 2);
+                else
+                    if (lc->tu.cross_pf) {
+                        ptrdiff_t stride = s->cur_frame->f->linesize[2];
+                        int hshift = s->ps.sps->hshift[2];
+                        int vshift = s->ps.sps->vshift[2];
+                        const int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
+                        int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
+                        int size = 1 << log2_trafo_size_c;
+
+                        uint8_t *dst = &s->cur_frame->f->data[2][(y0 >> vshift) * stride +
+                                                          ((x0 >> hshift) << s->ps.sps->pixel_shift)];
+                        for (i = 0; i < (size * size); i++) {
+                            coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
+                        }
+                        s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
+                    }
+            }
+        } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
+            int trafo_size_h = 1 << (log2_trafo_size + 1);
+            int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
+            for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
+                if (lc->cu.pred_mode == MODE_INTRA) {
+                    ff_hevc_set_neighbour_available(lc, xBase, yBase + (i << log2_trafo_size),
+                                                    trafo_size_h, trafo_size_v);
+                    s->hpc.intra_pred[log2_trafo_size - 2](lc, xBase, yBase + (i << log2_trafo_size), 1);
+                }
+                if (cbf_cb[i])
+                    ff_hevc_hls_residual_coding(lc, xBase, yBase + (i << log2_trafo_size),
+                                                log2_trafo_size, scan_idx_c, 1);
+            }
+            for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
+                if (lc->cu.pred_mode == MODE_INTRA) {
+                    ff_hevc_set_neighbour_available(lc, xBase, yBase + (i << log2_trafo_size),
+                                                trafo_size_h, trafo_size_v);
+                    s->hpc.intra_pred[log2_trafo_size - 2](lc, xBase, yBase + (i << log2_trafo_size), 2);
+                }
+                if (cbf_cr[i])
+                    ff_hevc_hls_residual_coding(lc, xBase, yBase + (i << log2_trafo_size),
+                                                log2_trafo_size, scan_idx_c, 2);
+            }
+        }
+    } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
+        if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
+            int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
+            int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
+            ff_hevc_set_neighbour_available(lc, x0, y0, trafo_size_h, trafo_size_v);
+            s->hpc.intra_pred[log2_trafo_size_c - 2](lc, x0, y0, 1);
+            s->hpc.intra_pred[log2_trafo_size_c - 2](lc, x0, y0, 2);
+            if (s->ps.sps->chroma_format_idc == 2) {
+                ff_hevc_set_neighbour_available(lc, x0, y0 + (1 << log2_trafo_size_c),
+                                                trafo_size_h, trafo_size_v);
+                s->hpc.intra_pred[log2_trafo_size_c - 2](lc, x0, y0 + (1 << log2_trafo_size_c), 1);
+                s->hpc.intra_pred[log2_trafo_size_c - 2](lc, x0, y0 + (1 << log2_trafo_size_c), 2);
+            }
+        } else if (blk_idx == 3) {
+            int trafo_size_h = 1 << (log2_trafo_size + 1);
+            int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
+            ff_hevc_set_neighbour_available(lc, xBase, yBase,
+                                            trafo_size_h, trafo_size_v);
+            s->hpc.intra_pred[log2_trafo_size - 2](lc, xBase, yBase, 1);
+            s->hpc.intra_pred[log2_trafo_size - 2](lc, xBase, yBase, 2);
+            if (s->ps.sps->chroma_format_idc == 2) {
+                ff_hevc_set_neighbour_available(lc, xBase, yBase + (1 << log2_trafo_size),
+                                                trafo_size_h, trafo_size_v);
+                s->hpc.intra_pred[log2_trafo_size - 2](lc, xBase, yBase + (1 << log2_trafo_size), 1);
+                s->hpc.intra_pred[log2_trafo_size - 2](lc, xBase, yBase + (1 << log2_trafo_size), 2);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static void set_deblocking_bypass(const HEVCContext *s, int x0, int y0, int log2_cb_size)
+{
+    int cb_size          = 1 << log2_cb_size;
+    int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
+
+    int min_pu_width     = s->ps.sps->min_pu_width;
+    int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
+    int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
+    int i, j;
+
+    for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
+        for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
+            s->is_pcm[i + j * min_pu_width] = 2;
+}
+
+static int hls_transform_tree(HEVCLocalContext *lc, int x0, int y0,
+                              int xBase, int yBase, int cb_xBase, int cb_yBase,
+                              int log2_cb_size, int log2_trafo_size,
+                              int trafo_depth, int blk_idx,
+                              const int *base_cbf_cb, const int *base_cbf_cr)
+{
+    const HEVCContext *const s = lc->parent;
+    uint8_t split_transform_flag;
+    int cbf_cb[2];
+    int cbf_cr[2];
+    int ret;
+
+    cbf_cb[0] = base_cbf_cb[0];
+    cbf_cb[1] = base_cbf_cb[1];
+    cbf_cr[0] = base_cbf_cr[0];
+    cbf_cr[1] = base_cbf_cr[1];
+
+    if (lc->cu.intra_split_flag) {
+        if (trafo_depth == 1) {
+            lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
+            if (s->ps.sps->chroma_format_idc == 3) {
+                lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
+                lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
+            } else {
+                lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
+                lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
+            }
+        }
+    } else {
+        lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
+        lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
+        lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
+    }
+
+    if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
+        log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
+        trafo_depth     < lc->cu.max_trafo_depth       &&
+        !(lc->cu.intra_split_flag && trafo_depth == 0)) {
+        split_transform_flag = ff_hevc_split_transform_flag_decode(lc, log2_trafo_size);
+    } else {
+        int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
+                          lc->cu.pred_mode == MODE_INTER &&
+                          lc->cu.part_mode != PART_2Nx2N &&
+                          trafo_depth == 0;
+
+        split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
+                               (lc->cu.intra_split_flag && trafo_depth == 0) ||
+                               inter_split;
+    }
+
+    if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
+        if (trafo_depth == 0 || cbf_cb[0]) {
+            cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(lc, trafo_depth);
+            if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
+                cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(lc, trafo_depth);
+            }
+        }
+
+        if (trafo_depth == 0 || cbf_cr[0]) {
+            cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(lc, trafo_depth);
+            if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
+                cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(lc, trafo_depth);
+            }
+        }
+    }
+
+    if (split_transform_flag) {
+        const int trafo_size_split = 1 << (log2_trafo_size - 1);
+        const int x1 = x0 + trafo_size_split;
+        const int y1 = y0 + trafo_size_split;
+
+#define SUBDIVIDE(x, y, idx)                                                    \
+do {                                                                            \
+    ret = hls_transform_tree(lc, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size,\
+                             log2_trafo_size - 1, trafo_depth + 1, idx,         \
+                             cbf_cb, cbf_cr);                                   \
+    if (ret < 0)                                                                \
+        return ret;                                                             \
+} while (0)
+
+        SUBDIVIDE(x0, y0, 0);
+        SUBDIVIDE(x1, y0, 1);
+        SUBDIVIDE(x0, y1, 2);
+        SUBDIVIDE(x1, y1, 3);
+
+#undef SUBDIVIDE
+    } else {
+        int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
+        int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
+        int min_tu_width     = s->ps.sps->min_tb_width;
+        int cbf_luma         = 1;
+
+        if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
+            cbf_cb[0] || cbf_cr[0] ||
+            (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
+            cbf_luma = ff_hevc_cbf_luma_decode(lc, trafo_depth);
+        }
+
+        ret = hls_transform_unit(lc, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
+                                 log2_cb_size, log2_trafo_size,
+                                 blk_idx, cbf_luma, cbf_cb, cbf_cr);
+        if (ret < 0)
+            return ret;
+        // TODO: store cbf_luma somewhere else
+        if (cbf_luma) {
+            int i, j;
+            for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
+                for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
+                    int x_tu = (x0 + j) >> log2_min_tu_size;
+                    int y_tu = (y0 + i) >> log2_min_tu_size;
+                    s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
+                }
+        }
+        if (!s->sh.disable_deblocking_filter_flag) {
+            ff_hevc_deblocking_boundary_strengths(lc, x0, y0, log2_trafo_size);
+            if (s->ps.pps->transquant_bypass_enable_flag &&
+                lc->cu.cu_transquant_bypass_flag)
+                set_deblocking_bypass(s, x0, y0, log2_trafo_size);
+        }
+    }
+    return 0;
+}
+
+static int hls_pcm_sample(HEVCLocalContext *lc, int x0, int y0, int log2_cb_size)
+{
+    const HEVCContext *const s = lc->parent;
+    GetBitContext gb;
+    int cb_size   = 1 << log2_cb_size;
+    ptrdiff_t stride0 = s->cur_frame->f->linesize[0];
+    ptrdiff_t stride1 = s->cur_frame->f->linesize[1];
+    ptrdiff_t stride2 = s->cur_frame->f->linesize[2];
+    uint8_t *dst0 = &s->cur_frame->f->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
+    uint8_t *dst1 = &s->cur_frame->f->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
+    uint8_t *dst2 = &s->cur_frame->f->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
+
+    int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
+                         (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
+                          ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
+                          s->ps.sps->pcm.bit_depth_chroma;
+    const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
+    int ret;
+
+    if (!s->sh.disable_deblocking_filter_flag)
+        ff_hevc_deblocking_boundary_strengths(lc, x0, y0, log2_cb_size);
+
+    ret = init_get_bits(&gb, pcm, length);
+    if (ret < 0)
+        return ret;
+
+    s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
+    if (s->ps.sps->chroma_format_idc) {
+        s->hevcdsp.put_pcm(dst1, stride1,
+                           cb_size >> s->ps.sps->hshift[1],
+                           cb_size >> s->ps.sps->vshift[1],
+                           &gb, s->ps.sps->pcm.bit_depth_chroma);
+        s->hevcdsp.put_pcm(dst2, stride2,
+                           cb_size >> s->ps.sps->hshift[2],
+                           cb_size >> s->ps.sps->vshift[2],
+                           &gb, s->ps.sps->pcm.bit_depth_chroma);
+    }
+
+    return 0;
+}
+
+/**
+ * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
+ *
+ * @param s HEVC decoding context
+ * @param dst target buffer for block data at block position
+ * @param dststride stride of the dst buffer
+ * @param ref reference picture buffer at origin (0, 0)
+ * @param mv motion vector (relative to block position) to get pixel data from
+ * @param x_off horizontal position of block from origin (0, 0)
+ * @param y_off vertical position of block from origin (0, 0)
+ * @param block_w width of block
+ * @param block_h height of block
+ * @param luma_weight weighting factor applied to the luma prediction
+ * @param luma_offset additive offset applied to the luma prediction value
+ */
+
+static void luma_mc_uni(HEVCLocalContext *lc, uint8_t *dst, ptrdiff_t dststride,
+                        const AVFrame *ref, const Mv *mv, int x_off, int y_off,
+                        int block_w, int block_h, int luma_weight, int luma_offset)
+{
+    const HEVCContext *const s = lc->parent;
+    const uint8_t *src   = ref->data[0];
+    ptrdiff_t srcstride  = ref->linesize[0];
+    int pic_width        = s->ps.sps->width;
+    int pic_height       = s->ps.sps->height;
+    int mx               = mv->x & 3;
+    int my               = mv->y & 3;
+    int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
+                           (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
+    int idx              = hevc_pel_weight[block_w];
+
+    x_off += mv->x >> 2;
+    y_off += mv->y >> 2;
+    src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
+
+    if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
+        x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
+        y_off >= pic_height - block_h - QPEL_EXTRA_AFTER ||
+        ref == s->cur_frame->f) {
+        const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+        int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
+        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
+
+        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
+                                 edge_emu_stride, srcstride,
+                                 block_w + QPEL_EXTRA,
+                                 block_h + QPEL_EXTRA,
+                                 x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
+                                 pic_width, pic_height);
+        src = lc->edge_emu_buffer + buf_offset;
+        srcstride = edge_emu_stride;
+    }
+
+    if (!weight_flag)
+        s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
+                                                      block_h, mx, my, block_w);
+    else
+        s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
+                                                        block_h, s->sh.luma_log2_weight_denom,
+                                                        luma_weight, luma_offset, mx, my, block_w);
+}
+
+/**
+ * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
+ *
+ * @param s HEVC decoding context
+ * @param dst target buffer for block data at block position
+ * @param dststride stride of the dst buffer
+ * @param ref0 reference picture0 buffer at origin (0, 0)
+ * @param mv0 motion vector0 (relative to block position) to get pixel data from
+ * @param x_off horizontal position of block from origin (0, 0)
+ * @param y_off vertical position of block from origin (0, 0)
+ * @param block_w width of block
+ * @param block_h height of block
+ * @param ref1 reference picture1 buffer at origin (0, 0)
+ * @param mv1 motion vector1 (relative to block position) to get pixel data from
+ * @param current_mv current motion vector structure
+ */
+ static void luma_mc_bi(HEVCLocalContext *lc, uint8_t *dst, ptrdiff_t dststride,
+                        const AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
+                        int block_w, int block_h, const AVFrame *ref1,
+                        const Mv *mv1, struct MvField *current_mv)
+{
+    const HEVCContext *const s = lc->parent;
+    ptrdiff_t src0stride  = ref0->linesize[0];
+    ptrdiff_t src1stride  = ref1->linesize[0];
+    int pic_width        = s->ps.sps->width;
+    int pic_height       = s->ps.sps->height;
+    int mx0              = mv0->x & 3;
+    int my0              = mv0->y & 3;
+    int mx1              = mv1->x & 3;
+    int my1              = mv1->y & 3;
+    int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
+                           (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
+    int x_off0           = x_off + (mv0->x >> 2);
+    int y_off0           = y_off + (mv0->y >> 2);
+    int x_off1           = x_off + (mv1->x >> 2);
+    int y_off1           = y_off + (mv1->y >> 2);
+    int idx              = hevc_pel_weight[block_w];
+
+    const uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
+    const uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
+
+    if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
+        x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
+        y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
+        const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+        int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
+        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
+
+        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
+                                 edge_emu_stride, src0stride,
+                                 block_w + QPEL_EXTRA,
+                                 block_h + QPEL_EXTRA,
+                                 x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
+                                 pic_width, pic_height);
+        src0 = lc->edge_emu_buffer + buf_offset;
+        src0stride = edge_emu_stride;
+    }
+
+    if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
+        x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
+        y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
+        const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+        int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
+        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
+
+        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
+                                 edge_emu_stride, src1stride,
+                                 block_w + QPEL_EXTRA,
+                                 block_h + QPEL_EXTRA,
+                                 x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
+                                 pic_width, pic_height);
+        src1 = lc->edge_emu_buffer2 + buf_offset;
+        src1stride = edge_emu_stride;
+    }
+
+    s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
+                                                block_h, mx0, my0, block_w);
+    if (!weight_flag)
+        s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
+                                                       block_h, mx1, my1, block_w);
+    else
+        s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
+                                                         block_h, s->sh.luma_log2_weight_denom,
+                                                         s->sh.luma_weight_l0[current_mv->ref_idx[0]],
+                                                         s->sh.luma_weight_l1[current_mv->ref_idx[1]],
+                                                         s->sh.luma_offset_l0[current_mv->ref_idx[0]],
+                                                         s->sh.luma_offset_l1[current_mv->ref_idx[1]],
+                                                         mx1, my1, block_w);
+
+}
+
+/**
+ * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
+ *
+ * @param s HEVC decoding context
+ * @param dst1 target buffer for block data at block position (U plane)
+ * @param dst2 target buffer for block data at block position (V plane)
+ * @param dststride stride of the dst1 and dst2 buffers
+ * @param ref reference picture buffer at origin (0, 0)
+ * @param mv motion vector (relative to block position) to get pixel data from
+ * @param x_off horizontal position of block from origin (0, 0)
+ * @param y_off vertical position of block from origin (0, 0)
+ * @param block_w width of block
+ * @param block_h height of block
+ * @param chroma_weight weighting factor applied to the chroma prediction
+ * @param chroma_offset additive offset applied to the chroma prediction value
+ */
+
+static void chroma_mc_uni(HEVCLocalContext *lc, uint8_t *dst0,
+                          ptrdiff_t dststride, const uint8_t *src0, ptrdiff_t srcstride, int reflist,
+                          int x_off, int y_off, int block_w, int block_h,
+                          const struct MvField *current_mv, int chroma_weight, int chroma_offset)
+{
+    const HEVCContext *const s = lc->parent;
+    int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
+    int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
+    const Mv *mv         = &current_mv->mv[reflist];
+    int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
+                           (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
+    int idx              = hevc_pel_weight[block_w];
+    int hshift           = s->ps.sps->hshift[1];
+    int vshift           = s->ps.sps->vshift[1];
+    intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
+    intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
+    intptr_t _mx         = mx << (1 - hshift);
+    intptr_t _my         = my << (1 - vshift);
+    int emu              = src0 == s->cur_frame->f->data[1] || src0 == s->cur_frame->f->data[2];
+
+    x_off += mv->x >> (2 + hshift);
+    y_off += mv->y >> (2 + vshift);
+    src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
+
+    if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
+        x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
+        y_off >= pic_height - block_h - EPEL_EXTRA_AFTER ||
+        emu) {
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+        int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
+        int buf_offset0 = EPEL_EXTRA_BEFORE *
+                          (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
+        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
+                                 edge_emu_stride, srcstride,
+                                 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
+                                 x_off - EPEL_EXTRA_BEFORE,
+                                 y_off - EPEL_EXTRA_BEFORE,
+                                 pic_width, pic_height);
+
+        src0 = lc->edge_emu_buffer + buf_offset0;
+        srcstride = edge_emu_stride;
+    }
+    if (!weight_flag)
+        s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
+                                                  block_h, _mx, _my, block_w);
+    else
+        s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
+                                                        block_h, s->sh.chroma_log2_weight_denom,
+                                                        chroma_weight, chroma_offset, _mx, _my, block_w);
+}
+
+/**
+ * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
+ *
+ * @param s HEVC decoding context
+ * @param dst target buffer for block data at block position
+ * @param dststride stride of the dst buffer
+ * @param ref0 reference picture0 buffer at origin (0, 0)
+ * @param mv0 motion vector0 (relative to block position) to get pixel data from
+ * @param x_off horizontal position of block from origin (0, 0)
+ * @param y_off vertical position of block from origin (0, 0)
+ * @param block_w width of block
+ * @param block_h height of block
+ * @param ref1 reference picture1 buffer at origin (0, 0)
+ * @param mv1 motion vector1 (relative to block position) to get pixel data from
+ * @param current_mv current motion vector structure
+ * @param cidx chroma component(cb, cr)
+ */
+static void chroma_mc_bi(HEVCLocalContext *lc, uint8_t *dst0, ptrdiff_t dststride,
+                         const AVFrame *ref0, const AVFrame *ref1,
+                         int x_off, int y_off, int block_w, int block_h, const MvField *current_mv, int cidx)
+{
+    const HEVCContext *const s = lc->parent;
+    const uint8_t *src1  = ref0->data[cidx+1];
+    const uint8_t *src2  = ref1->data[cidx+1];
+    ptrdiff_t src1stride = ref0->linesize[cidx+1];
+    ptrdiff_t src2stride = ref1->linesize[cidx+1];
+    int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
+                           (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
+    int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
+    int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
+    const Mv *const mv0  = &current_mv->mv[0];
+    const Mv *const mv1  = &current_mv->mv[1];
+    int hshift = s->ps.sps->hshift[1];
+    int vshift = s->ps.sps->vshift[1];
+
+    intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
+    intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
+    intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
+    intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
+    intptr_t _mx0 = mx0 << (1 - hshift);
+    intptr_t _my0 = my0 << (1 - vshift);
+    intptr_t _mx1 = mx1 << (1 - hshift);
+    intptr_t _my1 = my1 << (1 - vshift);
+
+    int x_off0 = x_off + (mv0->x >> (2 + hshift));
+    int y_off0 = y_off + (mv0->y >> (2 + vshift));
+    int x_off1 = x_off + (mv1->x >> (2 + hshift));
+    int y_off1 = y_off + (mv1->y >> (2 + vshift));
+    int idx = hevc_pel_weight[block_w];
+    src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
+    src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
+
+    if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
+        x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
+        y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+        int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
+        int buf_offset1 = EPEL_EXTRA_BEFORE *
+                          (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
+
+        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
+                                 edge_emu_stride, src1stride,
+                                 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
+                                 x_off0 - EPEL_EXTRA_BEFORE,
+                                 y_off0 - EPEL_EXTRA_BEFORE,
+                                 pic_width, pic_height);
+
+        src1 = lc->edge_emu_buffer + buf_offset1;
+        src1stride = edge_emu_stride;
+    }
+
+    if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
+        x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
+        y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+        int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
+        int buf_offset1 = EPEL_EXTRA_BEFORE *
+                          (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
+
+        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
+                                 edge_emu_stride, src2stride,
+                                 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
+                                 x_off1 - EPEL_EXTRA_BEFORE,
+                                 y_off1 - EPEL_EXTRA_BEFORE,
+                                 pic_width, pic_height);
+
+        src2 = lc->edge_emu_buffer2 + buf_offset1;
+        src2stride = edge_emu_stride;
+    }
+
+    s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
+                                                block_h, _mx0, _my0, block_w);
+    if (!weight_flag)
+        s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->cur_frame->f->linesize[cidx+1],
+                                                       src2, src2stride, lc->tmp,
+                                                       block_h, _mx1, _my1, block_w);
+    else
+        s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->cur_frame->f->linesize[cidx+1],
+                                                         src2, src2stride, lc->tmp,
+                                                         block_h,
+                                                         s->sh.chroma_log2_weight_denom,
+                                                         s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
+                                                         s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
+                                                         s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
+                                                         s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
+                                                         _mx1, _my1, block_w);
+}
+
+static void hevc_await_progress(const HEVCContext *s, const HEVCFrame *ref,
+                                const Mv *mv, int y0, int height)
+{
+    if (s->threads_type == FF_THREAD_FRAME ) {
+        int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
+
+        ff_progress_frame_await(&ref->tf, y);
+    }
+}
+
+static void hevc_luma_mv_mvp_mode(HEVCLocalContext *lc, int x0, int y0, int nPbW,
+                                  int nPbH, int log2_cb_size, int part_idx,
+                                  int merge_idx, MvField *mv)
+{
+    const HEVCContext *const s = lc->parent;
+    enum InterPredIdc inter_pred_idc = PRED_L0;
+    int mvp_flag;
+
+    ff_hevc_set_neighbour_available(lc, x0, y0, nPbW, nPbH);
+    mv->pred_flag = 0;
+    if (s->sh.slice_type == HEVC_SLICE_B)
+        inter_pred_idc = ff_hevc_inter_pred_idc_decode(lc, nPbW, nPbH);
+
+    if (inter_pred_idc != PRED_L1) {
+        if (s->sh.nb_refs[L0])
+            mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(lc, s->sh.nb_refs[L0]);
+
+        mv->pred_flag = PF_L0;
+        ff_hevc_hls_mvd_coding(lc, x0, y0, 0);
+        mvp_flag = ff_hevc_mvp_lx_flag_decode(lc);
+        ff_hevc_luma_mv_mvp_mode(lc, x0, y0, nPbW, nPbH, log2_cb_size,
+                                 part_idx, merge_idx, mv, mvp_flag, 0);
+        mv->mv[0].x += lc->pu.mvd.x;
+        mv->mv[0].y += lc->pu.mvd.y;
+    }
+
+    if (inter_pred_idc != PRED_L0) {
+        if (s->sh.nb_refs[L1])
+            mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(lc, s->sh.nb_refs[L1]);
+
+        if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
+            AV_ZERO32(&lc->pu.mvd);
+        } else {
+            ff_hevc_hls_mvd_coding(lc, x0, y0, 1);
+        }
+
+        mv->pred_flag += PF_L1;
+        mvp_flag = ff_hevc_mvp_lx_flag_decode(lc);
+        ff_hevc_luma_mv_mvp_mode(lc, x0, y0, nPbW, nPbH, log2_cb_size,
+                                 part_idx, merge_idx, mv, mvp_flag, 1);
+        mv->mv[1].x += lc->pu.mvd.x;
+        mv->mv[1].y += lc->pu.mvd.y;
+    }
+}
+
+static void hls_prediction_unit(HEVCLocalContext *lc, int x0, int y0,
+                                int nPbW, int nPbH,
+                                int log2_cb_size, int partIdx, int idx)
+{
+#define POS(c_idx, x, y)                                                              \
+    &s->cur_frame->f->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * linesize[c_idx] + \
+                           (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
+    const HEVCContext *const s = lc->parent;
+    int merge_idx = 0;
+    struct MvField current_mv = {{{ 0 }}};
+
+    int min_pu_width = s->ps.sps->min_pu_width;
+
+    MvField *tab_mvf = s->cur_frame->tab_mvf;
+    const RefPicList *refPicList = s->cur_frame->refPicList;
+    const HEVCFrame *ref0 = NULL, *ref1 = NULL;
+    const int *linesize = s->cur_frame->f->linesize;
+    uint8_t *dst0 = POS(0, x0, y0);
+    uint8_t *dst1 = POS(1, x0, y0);
+    uint8_t *dst2 = POS(2, x0, y0);
+    int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
+    int min_cb_width     = s->ps.sps->min_cb_width;
+    int x_cb             = x0 >> log2_min_cb_size;
+    int y_cb             = y0 >> log2_min_cb_size;
+    int x_pu, y_pu;
+    int i, j;
+
+    int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
+
+    if (!skip_flag)
+        lc->pu.merge_flag = ff_hevc_merge_flag_decode(lc);
+
+    if (skip_flag || lc->pu.merge_flag) {
+        if (s->sh.max_num_merge_cand > 1)
+            merge_idx = ff_hevc_merge_idx_decode(lc);
+        else
+            merge_idx = 0;
+
+        ff_hevc_luma_mv_merge_mode(lc, x0, y0, nPbW, nPbH, log2_cb_size,
+                                   partIdx, merge_idx, &current_mv);
+    } else {
+        hevc_luma_mv_mvp_mode(lc, x0, y0, nPbW, nPbH, log2_cb_size,
+                              partIdx, merge_idx, &current_mv);
+    }
+
+    x_pu = x0 >> s->ps.sps->log2_min_pu_size;
+    y_pu = y0 >> s->ps.sps->log2_min_pu_size;
+
+    for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
+        for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
+            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
+
+    if (current_mv.pred_flag & PF_L0) {
+        ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
+        if (!ref0 || !ref0->f)
+            return;
+        hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
+    }
+    if (current_mv.pred_flag & PF_L1) {
+        ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
+        if (!ref1 || !ref1->f)
+            return;
+        hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
+    }
+
+    if (current_mv.pred_flag == PF_L0) {
+        int x0_c = x0 >> s->ps.sps->hshift[1];
+        int y0_c = y0 >> s->ps.sps->vshift[1];
+        int nPbW_c = nPbW >> s->ps.sps->hshift[1];
+        int nPbH_c = nPbH >> s->ps.sps->vshift[1];
+
+        luma_mc_uni(lc, dst0, linesize[0], ref0->f,
+                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
+                    s->sh.luma_weight_l0[current_mv.ref_idx[0]],
+                    s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
+
+        if (s->ps.sps->chroma_format_idc) {
+            chroma_mc_uni(lc, dst1, linesize[1], ref0->f->data[1], ref0->f->linesize[1],
+                          0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
+                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
+            chroma_mc_uni(lc, dst2, linesize[2], ref0->f->data[2], ref0->f->linesize[2],
+                          0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
+                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
+        }
+    } else if (current_mv.pred_flag == PF_L1) {
+        int x0_c = x0 >> s->ps.sps->hshift[1];
+        int y0_c = y0 >> s->ps.sps->vshift[1];
+        int nPbW_c = nPbW >> s->ps.sps->hshift[1];
+        int nPbH_c = nPbH >> s->ps.sps->vshift[1];
+
+        luma_mc_uni(lc, dst0, linesize[0], ref1->f,
+                    &current_mv.mv[1], x0, y0, nPbW, nPbH,
+                    s->sh.luma_weight_l1[current_mv.ref_idx[1]],
+                    s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
+
+        if (s->ps.sps->chroma_format_idc) {
+            chroma_mc_uni(lc, dst1, linesize[1], ref1->f->data[1], ref1->f->linesize[1],
+                          1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
+                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
+
+            chroma_mc_uni(lc, dst2, linesize[2], ref1->f->data[2], ref1->f->linesize[2],
+                          1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
+                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
+        }
+    } else if (current_mv.pred_flag == PF_BI) {
+        int x0_c = x0 >> s->ps.sps->hshift[1];
+        int y0_c = y0 >> s->ps.sps->vshift[1];
+        int nPbW_c = nPbW >> s->ps.sps->hshift[1];
+        int nPbH_c = nPbH >> s->ps.sps->vshift[1];
+
+        luma_mc_bi(lc, dst0, linesize[0], ref0->f,
+                   &current_mv.mv[0], x0, y0, nPbW, nPbH,
+                   ref1->f, &current_mv.mv[1], &current_mv);
+
+        if (s->ps.sps->chroma_format_idc) {
+            chroma_mc_bi(lc, dst1, linesize[1], ref0->f, ref1->f,
+                         x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
+
+            chroma_mc_bi(lc, dst2, linesize[2], ref0->f, ref1->f,
+                         x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
+        }
+    }
+}
+
+/**
+ * 8.4.1
+ */
+static int luma_intra_pred_mode(HEVCLocalContext *lc, int x0, int y0, int pu_size,
+                                int prev_intra_luma_pred_flag)
+{
+    const HEVCContext *const s = lc->parent;
+    int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
+    int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
+    int min_pu_width     = s->ps.sps->min_pu_width;
+    int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
+    int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
+    int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
+
+    int cand_up   = (lc->ctb_up_flag || y0b) ?
+                    s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
+    int cand_left = (lc->ctb_left_flag || x0b) ?
+                    s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
+
+    int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
+
+    MvField *tab_mvf = s->cur_frame->tab_mvf;
+    int intra_pred_mode;
+    int candidate[3];
+    int i, j;
+
+    // intra_pred_mode prediction does not cross vertical CTB boundaries
+    if ((y0 - 1) < y_ctb)
+        cand_up = INTRA_DC;
+
+    if (cand_left == cand_up) {
+        if (cand_left < 2) {
+            candidate[0] = INTRA_PLANAR;
+            candidate[1] = INTRA_DC;
+            candidate[2] = INTRA_ANGULAR_26;
+        } else {
+            candidate[0] = cand_left;
+            candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
+            candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
+        }
+    } else {
+        candidate[0] = cand_left;
+        candidate[1] = cand_up;
+        if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
+            candidate[2] = INTRA_PLANAR;
+        } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
+            candidate[2] = INTRA_DC;
+        } else {
+            candidate[2] = INTRA_ANGULAR_26;
+        }
+    }
+
+    if (prev_intra_luma_pred_flag) {
+        intra_pred_mode = candidate[lc->pu.mpm_idx];
+    } else {
+        if (candidate[0] > candidate[1])
+            FFSWAP(uint8_t, candidate[0], candidate[1]);
+        if (candidate[0] > candidate[2])
+            FFSWAP(uint8_t, candidate[0], candidate[2]);
+        if (candidate[1] > candidate[2])
+            FFSWAP(uint8_t, candidate[1], candidate[2]);
+
+        intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
+        for (i = 0; i < 3; i++)
+            if (intra_pred_mode >= candidate[i])
+                intra_pred_mode++;
+    }
+
+    /* write the intra prediction units into the mv array */
+    if (!size_in_pus)
+        size_in_pus = 1;
+    for (i = 0; i < size_in_pus; i++) {
+        memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
+               intra_pred_mode, size_in_pus);
+
+        for (j = 0; j < size_in_pus; j++) {
+            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
+        }
+    }
+
+    return intra_pred_mode;
+}
+
+static av_always_inline void set_ct_depth(const HEVCContext *s, int x0, int y0,
+                                          int log2_cb_size, int ct_depth)
+{
+    int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
+    int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
+    int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
+    int y;
+
+    for (y = 0; y < length; y++)
+        memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
+               ct_depth, length);
+}
+
+static const uint8_t tab_mode_idx[] = {
+     0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
+    21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
+
+static void intra_prediction_unit(HEVCLocalContext *lc, int x0, int y0,
+                                  int log2_cb_size)
+{
+    const HEVCContext *const s = lc->parent;
+    static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
+    uint8_t prev_intra_luma_pred_flag[4];
+    int split   = lc->cu.part_mode == PART_NxN;
+    int pb_size = (1 << log2_cb_size) >> split;
+    int side    = split + 1;
+    int chroma_mode;
+    int i, j;
+
+    for (i = 0; i < side; i++)
+        for (j = 0; j < side; j++)
+            prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(lc);
+
+    for (i = 0; i < side; i++) {
+        for (j = 0; j < side; j++) {
+            if (prev_intra_luma_pred_flag[2 * i + j])
+                lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(lc);
+            else
+                lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(lc);
+
+            lc->pu.intra_pred_mode[2 * i + j] =
+                luma_intra_pred_mode(lc, x0 + pb_size * j, y0 + pb_size * i, pb_size,
+                                     prev_intra_luma_pred_flag[2 * i + j]);
+        }
+    }
+
+    if (s->ps.sps->chroma_format_idc == 3) {
+        for (i = 0; i < side; i++) {
+            for (j = 0; j < side; j++) {
+                lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(lc);
+                if (chroma_mode != 4) {
+                    if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
+                        lc->pu.intra_pred_mode_c[2 * i + j] = 34;
+                    else
+                        lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
+                } else {
+                    lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
+                }
+            }
+        }
+    } else if (s->ps.sps->chroma_format_idc == 2) {
+        int mode_idx;
+        lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(lc);
+        if (chroma_mode != 4) {
+            if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
+                mode_idx = 34;
+            else
+                mode_idx = intra_chroma_table[chroma_mode];
+        } else {
+            mode_idx = lc->pu.intra_pred_mode[0];
+        }
+        lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
+    } else if (s->ps.sps->chroma_format_idc != 0) {
+        chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(lc);
+        if (chroma_mode != 4) {
+            if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
+                lc->pu.intra_pred_mode_c[0] = 34;
+            else
+                lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
+        } else {
+            lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
+        }
+    }
+}
+
+static void intra_prediction_unit_default_value(HEVCLocalContext *lc,
+                                                int x0, int y0,
+                                                int log2_cb_size)
+{
+    const HEVCContext *const s = lc->parent;
+    int pb_size          = 1 << log2_cb_size;
+    int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
+    int min_pu_width     = s->ps.sps->min_pu_width;
+    MvField *tab_mvf     = s->cur_frame->tab_mvf;
+    int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
+    int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
+    int j, k;
+
+    if (size_in_pus == 0)
+        size_in_pus = 1;
+    for (j = 0; j < size_in_pus; j++)
+        memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
+    if (lc->cu.pred_mode == MODE_INTRA)
+        for (j = 0; j < size_in_pus; j++)
+            for (k = 0; k < size_in_pus; k++)
+                tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
+}
+
+static int hls_coding_unit(HEVCLocalContext *lc, const HEVCContext *s, int x0, int y0, int log2_cb_size)
+{
+    int cb_size          = 1 << log2_cb_size;
+    int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
+    int length           = cb_size >> log2_min_cb_size;
+    int min_cb_width     = s->ps.sps->min_cb_width;
+    int x_cb             = x0 >> log2_min_cb_size;
+    int y_cb             = y0 >> log2_min_cb_size;
+    int idx              = log2_cb_size - 2;
+    int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
+    int x, y, ret;
+
+    lc->cu.x                = x0;
+    lc->cu.y                = y0;
+    lc->cu.pred_mode        = MODE_INTRA;
+    lc->cu.part_mode        = PART_2Nx2N;
+    lc->cu.intra_split_flag = 0;
+
+    SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
+    for (x = 0; x < 4; x++)
+        lc->pu.intra_pred_mode[x] = 1;
+    if (s->ps.pps->transquant_bypass_enable_flag) {
+        lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(lc);
+        if (lc->cu.cu_transquant_bypass_flag)
+            set_deblocking_bypass(s, x0, y0, log2_cb_size);
+    } else
+        lc->cu.cu_transquant_bypass_flag = 0;
+
+    if (s->sh.slice_type != HEVC_SLICE_I) {
+        uint8_t skip_flag = ff_hevc_skip_flag_decode(lc, x0, y0, x_cb, y_cb);
+
+        x = y_cb * min_cb_width + x_cb;
+        for (y = 0; y < length; y++) {
+            memset(&s->skip_flag[x], skip_flag, length);
+            x += min_cb_width;
+        }
+        lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
+    } else {
+        x = y_cb * min_cb_width + x_cb;
+        for (y = 0; y < length; y++) {
+            memset(&s->skip_flag[x], 0, length);
+            x += min_cb_width;
+        }
+    }
+
+    if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
+        hls_prediction_unit(lc, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
+        intra_prediction_unit_default_value(lc, x0, y0, log2_cb_size);
+
+        if (!s->sh.disable_deblocking_filter_flag)
+            ff_hevc_deblocking_boundary_strengths(lc, x0, y0, log2_cb_size);
+    } else {
+        int pcm_flag = 0;
+
+        if (s->sh.slice_type != HEVC_SLICE_I)
+            lc->cu.pred_mode = ff_hevc_pred_mode_decode(lc);
+        if (lc->cu.pred_mode != MODE_INTRA ||
+            log2_cb_size == s->ps.sps->log2_min_cb_size) {
+            lc->cu.part_mode        = ff_hevc_part_mode_decode(lc, log2_cb_size);
+            lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
+                                      lc->cu.pred_mode == MODE_INTRA;
+        }
+
+        if (lc->cu.pred_mode == MODE_INTRA) {
+            if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled &&
+                log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
+                log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
+                pcm_flag = ff_hevc_pcm_flag_decode(lc);
+            }
+            if (pcm_flag) {
+                intra_prediction_unit_default_value(lc, x0, y0, log2_cb_size);
+                ret = hls_pcm_sample(lc, x0, y0, log2_cb_size);
+                if (s->ps.sps->pcm_loop_filter_disabled)
+                    set_deblocking_bypass(s, x0, y0, log2_cb_size);
+
+                if (ret < 0)
+                    return ret;
+            } else {
+                intra_prediction_unit(lc, x0, y0, log2_cb_size);
+            }
+        } else {
+            intra_prediction_unit_default_value(lc, x0, y0, log2_cb_size);
+            switch (lc->cu.part_mode) {
+            case PART_2Nx2N:
+                hls_prediction_unit(lc, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
+                break;
+            case PART_2NxN:
+                hls_prediction_unit(lc, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
+                hls_prediction_unit(lc, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
+                break;
+            case PART_Nx2N:
+                hls_prediction_unit(lc, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
+                hls_prediction_unit(lc, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
+                break;
+            case PART_2NxnU:
+                hls_prediction_unit(lc, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
+                hls_prediction_unit(lc, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
+                break;
+            case PART_2NxnD:
+                hls_prediction_unit(lc, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
+                hls_prediction_unit(lc, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
+                break;
+            case PART_nLx2N:
+                hls_prediction_unit(lc, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
+                hls_prediction_unit(lc, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
+                break;
+            case PART_nRx2N:
+                hls_prediction_unit(lc, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
+                hls_prediction_unit(lc, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
+                break;
+            case PART_NxN:
+                hls_prediction_unit(lc, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
+                hls_prediction_unit(lc, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
+                hls_prediction_unit(lc, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
+                hls_prediction_unit(lc, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
+                break;
+            }
+        }
+
+        if (!pcm_flag) {
+            int rqt_root_cbf = 1;
+
+            if (lc->cu.pred_mode != MODE_INTRA &&
+                !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
+                rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(lc);
+            }
+            if (rqt_root_cbf) {
+                const static int cbf[2] = { 0 };
+                lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
+                                         s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
+                                         s->ps.sps->max_transform_hierarchy_depth_inter;
+                ret = hls_transform_tree(lc, x0, y0, x0, y0, x0, y0,
+                                         log2_cb_size,
+                                         log2_cb_size, 0, 0, cbf, cbf);
+                if (ret < 0)
+                    return ret;
+            } else {
+                if (!s->sh.disable_deblocking_filter_flag)
+                    ff_hevc_deblocking_boundary_strengths(lc, x0, y0, log2_cb_size);
+            }
+        }
+    }
+
+    if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
+        ff_hevc_set_qPy(lc, x0, y0, log2_cb_size);
+
+    x = y_cb * min_cb_width + x_cb;
+    for (y = 0; y < length; y++) {
+        memset(&s->qp_y_tab[x], lc->qp_y, length);
+        x += min_cb_width;
+    }
+
+    if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
+       ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
+        lc->qPy_pred = lc->qp_y;
+    }
+
+    set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
+
+    return 0;
+}
+
+static int hls_coding_quadtree(HEVCLocalContext *lc, int x0, int y0,
+                               int log2_cb_size, int cb_depth)
+{
+    const HEVCContext *const s = lc->parent;
+    const int cb_size    = 1 << log2_cb_size;
+    int ret;
+    int split_cu;
+
+    lc->ct_depth = cb_depth;
+    if (x0 + cb_size <= s->ps.sps->width  &&
+        y0 + cb_size <= s->ps.sps->height &&
+        log2_cb_size > s->ps.sps->log2_min_cb_size) {
+        split_cu = ff_hevc_split_coding_unit_flag_decode(lc, cb_depth, x0, y0);
+    } else {
+        split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
+    }
+    if (s->ps.pps->cu_qp_delta_enabled_flag &&
+        log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
+        lc->tu.is_cu_qp_delta_coded = 0;
+        lc->tu.cu_qp_delta          = 0;
+    }
+
+    if (s->sh.cu_chroma_qp_offset_enabled_flag &&
+        log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
+        lc->tu.is_cu_chroma_qp_offset_coded = 0;
+    }
+
+    if (split_cu) {
+        int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
+        const int cb_size_split = cb_size >> 1;
+        const int x1 = x0 + cb_size_split;
+        const int y1 = y0 + cb_size_split;
+
+        int more_data = 0;
+
+        more_data = hls_coding_quadtree(lc, x0, y0, log2_cb_size - 1, cb_depth + 1);
+        if (more_data < 0)
+            return more_data;
+
+        if (more_data && x1 < s->ps.sps->width) {
+            more_data = hls_coding_quadtree(lc, x1, y0, log2_cb_size - 1, cb_depth + 1);
+            if (more_data < 0)
+                return more_data;
+        }
+        if (more_data && y1 < s->ps.sps->height) {
+            more_data = hls_coding_quadtree(lc, x0, y1, log2_cb_size - 1, cb_depth + 1);
+            if (more_data < 0)
+                return more_data;
+        }
+        if (more_data && x1 < s->ps.sps->width &&
+            y1 < s->ps.sps->height) {
+            more_data = hls_coding_quadtree(lc, x1, y1, log2_cb_size - 1, cb_depth + 1);
+            if (more_data < 0)
+                return more_data;
+        }
+
+        if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
+            ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
+            lc->qPy_pred = lc->qp_y;
+
+        if (more_data)
+            return ((x1 + cb_size_split) < s->ps.sps->width ||
+                    (y1 + cb_size_split) < s->ps.sps->height);
+        else
+            return 0;
+    } else {
+        ret = hls_coding_unit(lc, s, x0, y0, log2_cb_size);
+        if (ret < 0)
+            return ret;
+        if ((!((x0 + cb_size) %
+               (1 << (s->ps.sps->log2_ctb_size))) ||
+             (x0 + cb_size >= s->ps.sps->width)) &&
+            (!((y0 + cb_size) %
+               (1 << (s->ps.sps->log2_ctb_size))) ||
+             (y0 + cb_size >= s->ps.sps->height))) {
+            int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(lc);
+            return !end_of_slice_flag;
+        } else {
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+static void hls_decode_neighbour(HEVCLocalContext *lc, int x_ctb, int y_ctb,
+                                 int ctb_addr_ts)
+{
+    const HEVCContext *const s = lc->parent;
+    int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
+    int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
+    int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
+
+    s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
+
+    if (s->ps.pps->entropy_coding_sync_enabled_flag) {
+        if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
+            lc->first_qp_group = 1;
+        lc->end_of_tiles_x = s->ps.sps->width;
+    } else if (s->ps.pps->tiles_enabled_flag) {
+        if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
+            int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
+            lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
+            lc->first_qp_group   = 1;
+        }
+    } else {
+        lc->end_of_tiles_x = s->ps.sps->width;
+    }
+
+    lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
+
+    lc->boundary_flags = 0;
+    if (s->ps.pps->tiles_enabled_flag) {
+        if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
+            lc->boundary_flags |= BOUNDARY_LEFT_TILE;
+        if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
+            lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
+        if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
+            lc->boundary_flags |= BOUNDARY_UPPER_TILE;
+        if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
+            lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
+    } else {
+        if (ctb_addr_in_slice <= 0)
+            lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
+        if (ctb_addr_in_slice < s->ps.sps->ctb_width)
+            lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
+    }
+
+    lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
+    lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
+    lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
+    lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
+}
+
+static int hls_decode_entry(HEVCContext *s, GetBitContext *gb)
+{
+    HEVCLocalContext *const lc = &s->local_ctx[0];
+    const uint8_t *slice_data = gb->buffer + s->sh.data_offset;
+    const size_t   slice_size = gb->buffer_end - gb->buffer - s->sh.data_offset;
+    int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
+    int more_data   = 1;
+    int x_ctb       = 0;
+    int y_ctb       = 0;
+    int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
+    int ret;
+
+    if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
+        av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (s->sh.dependent_slice_segment_flag) {
+        int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
+        if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
+            av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
+        int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
+
+        x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
+        y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
+        hls_decode_neighbour(lc, x_ctb, y_ctb, ctb_addr_ts);
+
+        ret = ff_hevc_cabac_init(lc, ctb_addr_ts, slice_data, slice_size);
+        if (ret < 0) {
+            s->tab_slice_address[ctb_addr_rs] = -1;
+            return ret;
+        }
+
+        hls_sao_param(lc, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
+
+        s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
+        s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
+        s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
+
+        more_data = hls_coding_quadtree(lc, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
+        if (more_data < 0) {
+            s->tab_slice_address[ctb_addr_rs] = -1;
+            return more_data;
+        }
+
+
+        ctb_addr_ts++;
+        ff_hevc_save_states(lc, ctb_addr_ts);
+        ff_hevc_hls_filters(lc, x_ctb, y_ctb, ctb_size);
+    }
+
+    if (x_ctb + ctb_size >= s->ps.sps->width &&
+        y_ctb + ctb_size >= s->ps.sps->height)
+        ff_hevc_hls_filter(lc, x_ctb, y_ctb, ctb_size);
+
+    return ctb_addr_ts;
+}
+
+static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *hevc_lclist,
+                                int job, int self_id)
+{
+    HEVCLocalContext *lc = &((HEVCLocalContext*)hevc_lclist)[self_id];
+    const HEVCContext *const s = lc->parent;
+    int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
+    int more_data   = 1;
+    int ctb_row = job;
+    int ctb_addr_rs = s->sh.slice_ctb_addr_rs + ctb_row * ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size);
+    int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
+    int thread = ctb_row % s->threads_number;
+
+    const uint8_t *data      = s->data + s->sh.offset[ctb_row];
+    const size_t   data_size = s->sh.size[ctb_row];
+
+    int ret;
+
+    if (ctb_row)
+        ff_init_cabac_decoder(&lc->cc, data, data_size);
+
+    while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
+        int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
+        int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
+
+        hls_decode_neighbour(lc, x_ctb, y_ctb, ctb_addr_ts);
+
+        ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
+
+        /* atomic_load's prototype requires a pointer to non-const atomic variable
+         * (due to implementations via mutexes, where reads involve writes).
+         * Of course, casting const away here is nevertheless safe. */
+        if (atomic_load((atomic_int*)&s->wpp_err)) {
+            ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
+            return 0;
+        }
+
+        ret = ff_hevc_cabac_init(lc, ctb_addr_ts, data, data_size);
+        if (ret < 0)
+            goto error;
+        hls_sao_param(lc, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
+        more_data = hls_coding_quadtree(lc, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
+
+        if (more_data < 0) {
+            ret = more_data;
+            goto error;
+        }
+
+        ctb_addr_ts++;
+
+        ff_hevc_save_states(lc, ctb_addr_ts);
+        ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
+        ff_hevc_hls_filters(lc, x_ctb, y_ctb, ctb_size);
+
+        if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
+            /* Casting const away here is safe, because it is an atomic operation. */
+            atomic_store((atomic_int*)&s->wpp_err, 1);
+            ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
+            return 0;
+        }
+
+        if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
+            ff_hevc_hls_filter(lc, x_ctb, y_ctb, ctb_size);
+            ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
+            return ctb_addr_ts;
+        }
+        ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
+        x_ctb+=ctb_size;
+
+        if(x_ctb >= s->ps.sps->width) {
+            break;
+        }
+    }
+    ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
+
+    return 0;
+error:
+    s->tab_slice_address[ctb_addr_rs] = -1;
+    /* Casting const away here is safe, because it is an atomic operation. */
+    atomic_store((atomic_int*)&s->wpp_err, 1);
+    ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
+    return ret;
+}
+
+static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
+{
+    const uint8_t *data = nal->data;
+    int length          = nal->size;
+    int *ret;
+    int64_t offset;
+    int64_t startheader, cmpt = 0;
+    int i, j, res = 0;
+
+    if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
+        av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
+            s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
+            s->ps.sps->ctb_width, s->ps.sps->ctb_height
+        );
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (s->threads_number > s->nb_local_ctx) {
+        HEVCLocalContext *tmp = av_malloc_array(s->threads_number, sizeof(*s->local_ctx));
+
+        if (!tmp)
+            return AVERROR(ENOMEM);
+
+        memcpy(tmp, s->local_ctx, sizeof(*s->local_ctx) * s->nb_local_ctx);
+        av_free(s->local_ctx);
+        s->local_ctx = tmp;
+
+        for (unsigned i = s->nb_local_ctx; i < s->threads_number; i++) {
+            tmp = &s->local_ctx[i];
+
+            memset(tmp, 0, sizeof(*tmp));
+
+            tmp->logctx             = s->avctx;
+            tmp->parent             = s;
+            tmp->common_cabac_state = &s->cabac;
+        }
+
+        s->nb_local_ctx = s->threads_number;
+    }
+
+    offset = s->sh.data_offset;
+
+    for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
+        if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
+            startheader--;
+            cmpt++;
+        }
+    }
+
+    for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
+        offset += (s->sh.entry_point_offset[i - 1] - cmpt);
+        for (j = 0, cmpt = 0, startheader = offset
+             + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
+            if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
+                startheader--;
+                cmpt++;
+            }
+        }
+        s->sh.size[i]   = s->sh.entry_point_offset[i] - cmpt;
+        s->sh.offset[i] = offset;
+
+    }
+
+    offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
+    if (length < offset) {
+        av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
+        return AVERROR_INVALIDDATA;
+    }
+    s->sh.size  [s->sh.num_entry_point_offsets] = length - offset;
+    s->sh.offset[s->sh.num_entry_point_offsets] = offset;
+
+    s->sh.offset[0] = s->sh.data_offset;
+    s->sh.size[0]   = s->sh.offset[1] - s->sh.offset[0];
+
+    s->data = data;
+
+    for (i = 1; i < s->threads_number; i++) {
+        s->local_ctx[i].first_qp_group = 1;
+        s->local_ctx[i].qp_y = s->local_ctx[0].qp_y;
+    }
+
+    atomic_store(&s->wpp_err, 0);
+    res = ff_slice_thread_allocz_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
+    if (res < 0)
+        return res;
+
+    ret = av_calloc(s->sh.num_entry_point_offsets + 1, sizeof(*ret));
+    if (!ret)
+        return AVERROR(ENOMEM);
+
+    if (s->ps.pps->entropy_coding_sync_enabled_flag)
+        s->avctx->execute2(s->avctx, hls_decode_entry_wpp, s->local_ctx, ret, s->sh.num_entry_point_offsets + 1);
+
+    for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
+        res += ret[i];
+
+    av_free(ret);
+    return res;
+}
+
+static int set_side_data(HEVCContext *s)
+{
+    AVFrame *out = s->cur_frame->f;
+    int ret;
+
+    // Decrement the mastering display and content light level flag when IRAP
+    // frame has no_rasl_output_flag=1 so the side data persists for the entire
+    // coded video sequence.
+    if (IS_IRAP(s) && s->no_rasl_output_flag) {
+        if (s->sei.common.mastering_display.present > 0)
+            s->sei.common.mastering_display.present--;
+
+        if (s->sei.common.content_light.present > 0)
+            s->sei.common.content_light.present--;
+    }
+
+    ret = ff_h2645_sei_to_frame(out, &s->sei.common, AV_CODEC_ID_HEVC, s->avctx,
+                                &s->ps.sps->vui.common,
+                                s->ps.sps->bit_depth, s->ps.sps->bit_depth_chroma,
+                                s->cur_frame->poc /* no poc_offset in HEVC */);
+    if (ret < 0)
+        return ret;
+
+    if (s->sei.timecode.present) {
+        uint32_t *tc_sd;
+        char tcbuf[AV_TIMECODE_STR_SIZE];
+        AVFrameSideData *tcside;
+        ret = ff_frame_new_side_data(s->avctx, out, AV_FRAME_DATA_S12M_TIMECODE,
+                                     sizeof(uint32_t) * 4, &tcside);
+        if (ret < 0)
+            return ret;
+
+        if (tcside) {
+            tc_sd = (uint32_t*)tcside->data;
+            tc_sd[0] = s->sei.timecode.num_clock_ts;
+
+            for (int i = 0; i < tc_sd[0]; i++) {
+                int drop = s->sei.timecode.cnt_dropped_flag[i];
+                int   hh = s->sei.timecode.hours_value[i];
+                int   mm = s->sei.timecode.minutes_value[i];
+                int   ss = s->sei.timecode.seconds_value[i];
+                int   ff = s->sei.timecode.n_frames[i];
+
+                tc_sd[i + 1] = av_timecode_get_smpte(s->avctx->framerate, drop, hh, mm, ss, ff);
+                av_timecode_make_smpte_tc_string2(tcbuf, s->avctx->framerate, tc_sd[i + 1], 0, 0);
+                av_dict_set(&out->metadata, "timecode", tcbuf, 0);
+            }
+        }
+
+        s->sei.timecode.num_clock_ts = 0;
+    }
+
+    if (s->sei.common.dynamic_hdr_plus.info) {
+        AVBufferRef *info_ref = av_buffer_ref(s->sei.common.dynamic_hdr_plus.info);
+        if (!info_ref)
+            return AVERROR(ENOMEM);
+
+        ret = ff_frame_new_side_data_from_buf(s->avctx, out, AV_FRAME_DATA_DYNAMIC_HDR_PLUS, &info_ref, NULL);
+        if (ret < 0)
+            return ret;
+    }
+
+    if (s->rpu_buf) {
+        AVFrameSideData *rpu = av_frame_new_side_data_from_buf(out, AV_FRAME_DATA_DOVI_RPU_BUFFER, s->rpu_buf);
+        if (!rpu)
+            return AVERROR(ENOMEM);
+
+        s->rpu_buf = NULL;
+    }
+
+    if ((ret = ff_dovi_attach_side_data(&s->dovi_ctx, out)) < 0)
+        return ret;
+
+    if (s->sei.common.dynamic_hdr_vivid.info) {
+        AVBufferRef *info_ref = av_buffer_ref(s->sei.common.dynamic_hdr_vivid.info);
+        if (!info_ref)
+            return AVERROR(ENOMEM);
+
+        if (!av_frame_new_side_data_from_buf(out, AV_FRAME_DATA_DYNAMIC_HDR_VIVID, info_ref)) {
+            av_buffer_unref(&info_ref);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    return 0;
+}
+
+static int hevc_frame_start(HEVCContext *s)
+{
+    int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
+                           ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
+    int ret;
+
+    memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
+    memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
+    memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
+    memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
+    memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
+
+    s->is_decoded        = 0;
+    s->first_nal_type    = s->nal_unit_type;
+
+    s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);
+
+    if (s->ps.pps->tiles_enabled_flag)
+        s->local_ctx[0].end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
+
+    ret = ff_hevc_set_new_ref(s, s->poc);
+    if (ret < 0)
+        goto fail;
+
+    ret = ff_hevc_frame_rps(s);
+    if (ret < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
+        goto fail;
+    }
+
+    if (IS_IRAP(s))
+        s->cur_frame->f->flags |= AV_FRAME_FLAG_KEY;
+    else
+        s->cur_frame->f->flags &= ~AV_FRAME_FLAG_KEY;
+
+    s->cur_frame->needs_fg = (s->sei.common.film_grain_characteristics.present ||
+                              s->sei.common.aom_film_grain.enable) &&
+        !(s->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN) &&
+        !s->avctx->hwaccel;
+
+    ret = set_side_data(s);
+    if (ret < 0)
+        goto fail;
+
+    if (s->cur_frame->needs_fg &&
+        (s->sei.common.film_grain_characteristics.present &&
+         !ff_h274_film_grain_params_supported(s->sei.common.film_grain_characteristics.model_id,
+                                              s->cur_frame->f->format) ||
+         !av_film_grain_params_select(s->cur_frame->f))) {
+        av_log_once(s->avctx, AV_LOG_WARNING, AV_LOG_DEBUG, &s->film_grain_warning_shown,
+                    "Unsupported film grain parameters. Ignoring film grain.\n");
+        s->cur_frame->needs_fg = 0;
+    }
+
+    if (s->cur_frame->needs_fg) {
+        s->cur_frame->frame_grain->format = s->cur_frame->f->format;
+        s->cur_frame->frame_grain->width  = s->cur_frame->f->width;
+        s->cur_frame->frame_grain->height = s->cur_frame->f->height;
+        if ((ret = ff_thread_get_buffer(s->avctx, s->cur_frame->frame_grain, 0)) < 0)
+            goto fail;
+    }
+
+    s->cur_frame->f->pict_type = 3 - s->sh.slice_type;
+
+    if (!IS_IRAP(s))
+        ff_hevc_bump_frame(s);
+
+    av_frame_unref(s->output_frame);
+    ret = ff_hevc_output_frame(s, s->output_frame, 0);
+    if (ret < 0)
+        goto fail;
+
+    if (!s->avctx->hwaccel)
+        ff_thread_finish_setup(s->avctx);
+
+    return 0;
+
+fail:
+    if (s->cur_frame)
+        ff_hevc_unref_frame(s->cur_frame, ~0);
+    s->cur_frame = s->collocated_ref = NULL;
+    return ret;
+}
+
+static int hevc_frame_end(HEVCContext *s)
+{
+    HEVCFrame *out = s->cur_frame;
+    const AVFilmGrainParams *fgp;
+    av_unused int ret;
+
+    if (out->needs_fg) {
+        av_assert0(out->frame_grain->buf[0]);
+        fgp = av_film_grain_params_select(out->f);
+        switch (fgp->type) {
+        case AV_FILM_GRAIN_PARAMS_NONE:
+            av_assert0(0);
+            return AVERROR_BUG;
+        case AV_FILM_GRAIN_PARAMS_H274:
+            ret = ff_h274_apply_film_grain(out->frame_grain, out->f,
+                                           &s->h274db, fgp);
+            break;
+        case AV_FILM_GRAIN_PARAMS_AV1:
+            ret = ff_aom_apply_film_grain(out->frame_grain, out->f, fgp);
+            break;
+        }
+        av_assert1(ret >= 0);
+    }
+
+    return 0;
+}
+
+static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
+{
+    GetBitContext     gb = nal->gb;
+    int ctb_addr_ts, ret;
+
+    s->nal_unit_type = nal->type;
+    s->temporal_id   = nal->temporal_id;
+
+    switch (s->nal_unit_type) {
+    case HEVC_NAL_VPS:
+        if (FF_HW_HAS_CB(s->avctx, decode_params)) {
+            ret = FF_HW_CALL(s->avctx, decode_params,
+                             nal->type, nal->raw_data, nal->raw_size);
+            if (ret < 0)
+                goto fail;
+        }
+        ret = ff_hevc_decode_nal_vps(&gb, s->avctx, &s->ps);
+        if (ret < 0)
+            goto fail;
+        break;
+    case HEVC_NAL_SPS:
+        if (FF_HW_HAS_CB(s->avctx, decode_params)) {
+            ret = FF_HW_CALL(s->avctx, decode_params,
+                             nal->type, nal->raw_data, nal->raw_size);
+            if (ret < 0)
+                goto fail;
+        }
+        ret = ff_hevc_decode_nal_sps(&gb, s->avctx, &s->ps,
+                                     s->apply_defdispwin);
+        if (ret < 0)
+            goto fail;
+        break;
+    case HEVC_NAL_PPS:
+        if (FF_HW_HAS_CB(s->avctx, decode_params)) {
+            ret = FF_HW_CALL(s->avctx, decode_params,
+                             nal->type, nal->raw_data, nal->raw_size);
+            if (ret < 0)
+                goto fail;
+        }
+        ret = ff_hevc_decode_nal_pps(&gb, s->avctx, &s->ps);
+        if (ret < 0)
+            goto fail;
+        break;
+    case HEVC_NAL_SEI_PREFIX:
+    case HEVC_NAL_SEI_SUFFIX:
+        if (FF_HW_HAS_CB(s->avctx, decode_params)) {
+            ret = FF_HW_CALL(s->avctx, decode_params,
+                             nal->type, nal->raw_data, nal->raw_size);
+            if (ret < 0)
+                goto fail;
+        }
+        ret = ff_hevc_decode_nal_sei(&gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
+        if (ret < 0)
+            goto fail;
+        break;
+    case HEVC_NAL_TRAIL_R:
+    case HEVC_NAL_TRAIL_N:
+    case HEVC_NAL_TSA_N:
+    case HEVC_NAL_TSA_R:
+    case HEVC_NAL_STSA_N:
+    case HEVC_NAL_STSA_R:
+    case HEVC_NAL_BLA_W_LP:
+    case HEVC_NAL_BLA_W_RADL:
+    case HEVC_NAL_BLA_N_LP:
+    case HEVC_NAL_IDR_W_RADL:
+    case HEVC_NAL_IDR_N_LP:
+    case HEVC_NAL_CRA_NUT:
+    case HEVC_NAL_RADL_N:
+    case HEVC_NAL_RADL_R:
+    case HEVC_NAL_RASL_N:
+    case HEVC_NAL_RASL_R:
+        ret = hls_slice_header(s, &gb);
+        if (ret < 0)
+            return ret;
+        if (ret == 1) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+
+        if (
+            (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) ||
+            (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) ||
+            (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s))) {
+            break;
+        }
+
+        if (s->sh.first_slice_in_pic_flag) {
+            if (s->max_ra == INT_MAX) {
+                if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
+                    s->max_ra = s->poc;
+                } else {
+                    if (IS_IDR(s))
+                        s->max_ra = INT_MIN;
+                }
+            }
+
+            if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
+                s->poc <= s->max_ra) {
+                s->is_decoded = 0;
+                break;
+            } else {
+                if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
+                    s->max_ra = INT_MIN;
+            }
+
+            s->overlap ++;
+            ret = hevc_frame_start(s);
+            if (ret < 0)
+                return ret;
+        } else if (!s->cur_frame) {
+            av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
+            goto fail;
+        }
+
+        if (s->nal_unit_type != s->first_nal_type) {
+            av_log(s->avctx, AV_LOG_ERROR,
+                   "Non-matching NAL types of the VCL NALUs: %d %d\n",
+                   s->first_nal_type, s->nal_unit_type);
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (!s->sh.dependent_slice_segment_flag &&
+            s->sh.slice_type != HEVC_SLICE_I) {
+            ret = ff_hevc_slice_rpl(s);
+            if (ret < 0) {
+                av_log(s->avctx, AV_LOG_WARNING,
+                       "Error constructing the reference lists for the current slice.\n");
+                goto fail;
+            }
+        }
+
+        if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
+            ret = FF_HW_CALL(s->avctx, start_frame, NULL, 0);
+            if (ret < 0)
+                goto fail;
+        }
+
+        if (s->avctx->hwaccel) {
+            ret = FF_HW_CALL(s->avctx, decode_slice, nal->raw_data, nal->raw_size);
+            if (ret < 0)
+                goto fail;
+        } else {
+            if (s->avctx->profile == AV_PROFILE_HEVC_SCC) {
+                av_log(s->avctx, AV_LOG_ERROR,
+                       "SCC profile is not yet implemented in hevc native decoder.\n");
+                ret = AVERROR_PATCHWELCOME;
+                goto fail;
+            }
+
+            if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
+                ctb_addr_ts = hls_slice_data_wpp(s, nal);
+            else
+                ctb_addr_ts = hls_decode_entry(s, &gb);
+            if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
+                ret = hevc_frame_end(s);
+                if (ret < 0)
+                    goto fail;
+                s->is_decoded = 1;
+            }
+
+            if (ctb_addr_ts < 0) {
+                ret = ctb_addr_ts;
+                goto fail;
+            }
+        }
+        break;
+    case HEVC_NAL_EOS_NUT:
+    case HEVC_NAL_EOB_NUT:
+        s->seq_decode = (s->seq_decode + 1) & HEVC_SEQUENCE_COUNTER_MASK;
+        s->max_ra     = INT_MAX;
+        break;
+    case HEVC_NAL_AUD:
+    case HEVC_NAL_FD_NUT:
+    case HEVC_NAL_UNSPEC62:
+        break;
+    default:
+        av_log(s->avctx, AV_LOG_INFO,
+               "Skipping NAL unit %d\n", s->nal_unit_type);
+    }
+
+    return 0;
+fail:
+    if (s->avctx->err_recognition & AV_EF_EXPLODE)
+        return ret;
+    return 0;
+}
+
+static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
+{
+    int i, ret = 0;
+    int eos_at_start = 1;
+
+    s->cur_frame = s->collocated_ref = NULL;
+    s->last_eos = s->eos;
+    s->eos = 0;
+    s->overlap = 0;
+
+    /* split the input packet into NAL units, so we know the upper bound on the
+     * number of slices in the frame */
+    ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
+                                s->nal_length_size, s->avctx->codec_id, 1, 0);
+    if (ret < 0) {
+        av_log(s->avctx, AV_LOG_ERROR,
+               "Error splitting the input into NAL units.\n");
+        return ret;
+    }
+
+    for (i = 0; i < s->pkt.nb_nals; i++) {
+        if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
+            s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
+            if (eos_at_start) {
+                s->last_eos = 1;
+            } else {
+                s->eos = 1;
+            }
+        } else {
+            eos_at_start = 0;
+        }
+    }
+
+    /*
+     * Check for RPU delimiter.
+     *
+     * Dolby Vision RPUs masquerade as unregistered NALs of type 62.
+     *
+     * We have to do this check here an create the rpu buffer, since RPUs are appended
+     * to the end of an AU; they are the last non-EOB/EOS NAL in the AU.
+     */
+    if (s->pkt.nb_nals > 1 && s->pkt.nals[s->pkt.nb_nals - 1].type == HEVC_NAL_UNSPEC62 &&
+        s->pkt.nals[s->pkt.nb_nals - 1].size > 2 && !s->pkt.nals[s->pkt.nb_nals - 1].nuh_layer_id
+        && !s->pkt.nals[s->pkt.nb_nals - 1].temporal_id) {
+        H2645NAL *nal = &s->pkt.nals[s->pkt.nb_nals - 1];
+        if (s->rpu_buf) {
+            av_buffer_unref(&s->rpu_buf);
+            av_log(s->avctx, AV_LOG_WARNING, "Multiple Dolby Vision RPUs found in one AU. Skipping previous.\n");
+        }
+
+        s->rpu_buf = av_buffer_alloc(nal->raw_size - 2);
+        if (!s->rpu_buf)
+            return AVERROR(ENOMEM);
+        memcpy(s->rpu_buf->data, nal->raw_data + 2, nal->raw_size - 2);
+
+        ret = ff_dovi_rpu_parse(&s->dovi_ctx, nal->data + 2, nal->size - 2,
+                                s->avctx->err_recognition);
+        if (ret < 0) {
+            av_buffer_unref(&s->rpu_buf);
+            av_log(s->avctx, AV_LOG_WARNING, "Error parsing DOVI NAL unit.\n");
+            /* ignore */
+        }
+    }
+
+    /* decode the NAL units */
+    for (i = 0; i < s->pkt.nb_nals; i++) {
+        H2645NAL *nal = &s->pkt.nals[i];
+
+        if (s->avctx->skip_frame >= AVDISCARD_ALL ||
+            (s->avctx->skip_frame >= AVDISCARD_NONREF
+            && ff_hevc_nal_is_nonref(nal->type)) || nal->nuh_layer_id > 0)
+            continue;
+
+        ret = decode_nal_unit(s, nal);
+        if (ret >= 0 && s->overlap > 2)
+            ret = AVERROR_INVALIDDATA;
+        if (ret < 0) {
+            av_log(s->avctx, AV_LOG_WARNING,
+                   "Error parsing NAL unit #%d.\n", i);
+            goto fail;
+        }
+    }
+
+fail:
+    if (s->cur_frame && s->threads_type == FF_THREAD_FRAME)
+        ff_progress_frame_report(&s->cur_frame->tf, INT_MAX);
+
+    return ret;
+}
+
+static int verify_md5(HEVCContext *s, AVFrame *frame)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+    char msg_buf[4 * (50 + 2 * 2 * 16 /* MD5-size */)];
+    int pixel_shift;
+    int err = 0;
+    int i, j;
+
+    if (!desc)
+        return AVERROR(EINVAL);
+
+    pixel_shift = desc->comp[0].depth > 8;
+
+    /* the checksums are LE, so we have to byteswap for >8bpp formats
+     * on BE arches */
+#if HAVE_BIGENDIAN
+    if (pixel_shift && !s->checksum_buf) {
+        av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
+                       FFMAX3(frame->linesize[0], frame->linesize[1],
+                              frame->linesize[2]));
+        if (!s->checksum_buf)
+            return AVERROR(ENOMEM);
+    }
+#endif
+
+    msg_buf[0] = '\0';
+    for (i = 0; frame->data[i]; i++) {
+        int width  = s->avctx->coded_width;
+        int height = s->avctx->coded_height;
+        int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
+        int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
+        uint8_t md5[16];
+
+        av_md5_init(s->md5_ctx);
+        for (j = 0; j < h; j++) {
+            const uint8_t *src = frame->data[i] + j * frame->linesize[i];
+#if HAVE_BIGENDIAN
+            if (pixel_shift) {
+                s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
+                                    (const uint16_t *) src, w);
+                src = s->checksum_buf;
+            }
+#endif
+            av_md5_update(s->md5_ctx, src, w << pixel_shift);
+        }
+        av_md5_final(s->md5_ctx, md5);
+
+#define MD5_PRI "%016" PRIx64 "%016" PRIx64
+#define MD5_PRI_ARG(buf) AV_RB64(buf), AV_RB64((const uint8_t*)(buf) + 8)
+
+        if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
+            av_strlcatf(msg_buf, sizeof(msg_buf),
+                        "plane %d - correct " MD5_PRI "; ",
+                        i, MD5_PRI_ARG(md5));
+        } else {
+            av_strlcatf(msg_buf, sizeof(msg_buf),
+                       "mismatching checksum of plane %d - " MD5_PRI " != " MD5_PRI "; ",
+                        i, MD5_PRI_ARG(md5), MD5_PRI_ARG(s->sei.picture_hash.md5[i]));
+            err = AVERROR_INVALIDDATA;
+        }
+    }
+
+    av_log(s->avctx, err < 0 ? AV_LOG_ERROR : AV_LOG_DEBUG,
+           "Verifying checksum for frame with POC %d: %s\n",
+           s->poc, msg_buf);
+
+    return err;
+}
+
+static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int first)
+{
+    int ret, i;
+
+    ret = ff_hevc_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff,
+                                   &s->nal_length_size, s->avctx->err_recognition,
+                                   s->apply_defdispwin, s->avctx);
+    if (ret < 0)
+        return ret;
+
+    /* export stream parameters from the first SPS */
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
+        if (first && s->ps.sps_list[i]) {
+            const HEVCSPS *sps = s->ps.sps_list[i];
+            export_stream_params(s, sps);
+            break;
+        }
+    }
+
+    /* export stream parameters from SEI */
+    ret = export_stream_params_from_sei(s);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static int hevc_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
+                             int *got_output, AVPacket *avpkt)
+{
+    int ret;
+    uint8_t *sd;
+    size_t sd_size;
+    HEVCContext *s = avctx->priv_data;
+
+    if (!avpkt->size) {
+        ret = ff_hevc_output_frame(s, rframe, 1);
+        if (ret < 0)
+            return ret;
+
+        *got_output = ret;
+        return 0;
+    }
+
+    sd = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA, &sd_size);
+    if (sd && sd_size > 0) {
+        ret = hevc_decode_extradata(s, sd, sd_size, 0);
+        if (ret < 0)
+            return ret;
+    }
+
+    sd = av_packet_get_side_data(avpkt, AV_PKT_DATA_DOVI_CONF, &sd_size);
+    if (sd && sd_size >= sizeof(s->dovi_ctx.cfg)) {
+        int old = s->dovi_ctx.cfg.dv_profile;
+        s->dovi_ctx.cfg = *(AVDOVIDecoderConfigurationRecord *) sd;
+        if (old)
+            av_log(avctx, AV_LOG_DEBUG,
+                   "New DOVI configuration record from input packet (profile %d -> %u).\n",
+                   old, s->dovi_ctx.cfg.dv_profile);
+    }
+
+    s->cur_frame = s->collocated_ref = NULL;
+    ret    = decode_nal_units(s, avpkt->data, avpkt->size);
+    if (ret < 0)
+        return ret;
+
+    if (avctx->hwaccel) {
+        if (s->cur_frame && (ret = FF_HW_SIMPLE_CALL(avctx, end_frame)) < 0) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "hardware accelerator failed to decode picture\n");
+            ff_hevc_unref_frame(s->cur_frame, ~0);
+            return ret;
+        }
+    } else {
+        /* verify the SEI checksum */
+        if (avctx->err_recognition & AV_EF_CRCCHECK && s->cur_frame && s->is_decoded &&
+            s->sei.picture_hash.is_md5) {
+            ret = verify_md5(s, s->cur_frame->f);
+            if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
+                ff_hevc_unref_frame(s->cur_frame, ~0);
+                return ret;
+            }
+        }
+    }
+    s->sei.picture_hash.is_md5 = 0;
+
+    if (s->is_decoded) {
+        av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
+        s->is_decoded = 0;
+    }
+
+    if (s->output_frame->buf[0]) {
+        av_frame_move_ref(rframe, s->output_frame);
+        *got_output = 1;
+    }
+
+    return avpkt->size;
+}
+
+static int hevc_ref_frame(HEVCFrame *dst, HEVCFrame *src)
+{
+    int ret;
+
+    ff_progress_frame_ref(&dst->tf, &src->tf);
+
+    if (src->needs_fg) {
+        ret = av_frame_ref(dst->frame_grain, src->frame_grain);
+        if (ret < 0) {
+            ff_hevc_unref_frame(dst, ~0);
+            return ret;
+        }
+        dst->needs_fg = 1;
+    }
+
+    dst->tab_mvf = ff_refstruct_ref(src->tab_mvf);
+    dst->rpl_tab = ff_refstruct_ref(src->rpl_tab);
+    dst->rpl = ff_refstruct_ref(src->rpl);
+    dst->nb_rpl_elems = src->nb_rpl_elems;
+
+    dst->poc        = src->poc;
+    dst->ctb_count  = src->ctb_count;
+    dst->flags      = src->flags;
+    dst->sequence   = src->sequence;
+
+    ff_refstruct_replace(&dst->hwaccel_picture_private,
+                          src->hwaccel_picture_private);
+
+    return 0;
+}
+
+static av_cold int hevc_decode_free(AVCodecContext *avctx)
+{
+    HEVCContext       *s = avctx->priv_data;
+    int i;
+
+    pic_arrays_free(s);
+
+    ff_dovi_ctx_unref(&s->dovi_ctx);
+    av_buffer_unref(&s->rpu_buf);
+
+    av_freep(&s->md5_ctx);
+
+    for (i = 0; i < 3; i++) {
+        av_freep(&s->sao_pixel_buffer_h[i]);
+        av_freep(&s->sao_pixel_buffer_v[i]);
+    }
+    av_frame_free(&s->output_frame);
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+        ff_hevc_unref_frame(&s->DPB[i], ~0);
+        av_frame_free(&s->DPB[i].frame_grain);
+    }
+
+    ff_hevc_ps_uninit(&s->ps);
+
+    av_freep(&s->sh.entry_point_offset);
+    av_freep(&s->sh.offset);
+    av_freep(&s->sh.size);
+
+    av_freep(&s->local_ctx);
+
+    ff_h2645_packet_uninit(&s->pkt);
+
+    ff_hevc_reset_sei(&s->sei);
+
+    return 0;
+}
+
+static av_cold int hevc_init_context(AVCodecContext *avctx)
+{
+    HEVCContext *s = avctx->priv_data;
+    int i;
+
+    s->avctx = avctx;
+
+    s->local_ctx = av_mallocz(sizeof(*s->local_ctx));
+    if (!s->local_ctx)
+        return AVERROR(ENOMEM);
+    s->nb_local_ctx = 1;
+
+    s->local_ctx[0].parent = s;
+    s->local_ctx[0].logctx = avctx;
+    s->local_ctx[0].common_cabac_state = &s->cabac;
+
+    s->output_frame = av_frame_alloc();
+    if (!s->output_frame)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+        s->DPB[i].frame_grain = av_frame_alloc();
+        if (!s->DPB[i].frame_grain)
+            return AVERROR(ENOMEM);
+    }
+
+    s->max_ra = INT_MAX;
+
+    s->md5_ctx = av_md5_alloc();
+    if (!s->md5_ctx)
+        return AVERROR(ENOMEM);
+
+    ff_bswapdsp_init(&s->bdsp);
+
+    s->dovi_ctx.logctx = avctx;
+    s->eos = 0;
+
+    ff_hevc_reset_sei(&s->sei);
+
+    return 0;
+}
+
+#if HAVE_THREADS
+static int hevc_update_thread_context(AVCodecContext *dst,
+                                      const AVCodecContext *src)
+{
+    HEVCContext *s  = dst->priv_data;
+    HEVCContext *s0 = src->priv_data;
+    int i, ret;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+        ff_hevc_unref_frame(&s->DPB[i], ~0);
+        if (s0->DPB[i].f) {
+            ret = hevc_ref_frame(&s->DPB[i], &s0->DPB[i]);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    if (s->ps.sps != s0->ps.sps)
+        s->ps.sps = NULL;
+    for (int i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
+        ff_refstruct_replace(&s->ps.vps_list[i], s0->ps.vps_list[i]);
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
+        ff_refstruct_replace(&s->ps.sps_list[i], s0->ps.sps_list[i]);
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
+        ff_refstruct_replace(&s->ps.pps_list[i], s0->ps.pps_list[i]);
+
+    if (s->ps.sps != s0->ps.sps)
+        if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
+            return ret;
+
+    s->seq_decode = s0->seq_decode;
+    s->seq_output = s0->seq_output;
+    s->pocTid0    = s0->pocTid0;
+    s->max_ra     = s0->max_ra;
+    s->eos        = s0->eos;
+    s->no_rasl_output_flag = s0->no_rasl_output_flag;
+
+    s->is_nalff        = s0->is_nalff;
+    s->nal_length_size = s0->nal_length_size;
+
+    s->threads_number      = s0->threads_number;
+    s->threads_type        = s0->threads_type;
+
+    s->film_grain_warning_shown = s0->film_grain_warning_shown;
+
+    if (s0->eos) {
+        s->seq_decode = (s->seq_decode + 1) & HEVC_SEQUENCE_COUNTER_MASK;
+        s->max_ra = INT_MAX;
+    }
+
+    ret = ff_h2645_sei_ctx_replace(&s->sei.common, &s0->sei.common);
+    if (ret < 0)
+        return ret;
+
+    ret = av_buffer_replace(&s->sei.common.dynamic_hdr_plus.info,
+                            s0->sei.common.dynamic_hdr_plus.info);
+    if (ret < 0)
+        return ret;
+
+    ret = av_buffer_replace(&s->rpu_buf, s0->rpu_buf);
+    if (ret < 0)
+        return ret;
+
+    ff_dovi_ctx_replace(&s->dovi_ctx, &s0->dovi_ctx);
+
+    ret = av_buffer_replace(&s->sei.common.dynamic_hdr_vivid.info,
+                            s0->sei.common.dynamic_hdr_vivid.info);
+    if (ret < 0)
+        return ret;
+
+    s->sei.common.frame_packing        = s0->sei.common.frame_packing;
+    s->sei.common.display_orientation  = s0->sei.common.display_orientation;
+    s->sei.common.alternative_transfer = s0->sei.common.alternative_transfer;
+    s->sei.common.mastering_display    = s0->sei.common.mastering_display;
+    s->sei.common.content_light        = s0->sei.common.content_light;
+    s->sei.common.aom_film_grain       = s0->sei.common.aom_film_grain;
+
+    ret = export_stream_params_from_sei(s);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+#endif
+
+static av_cold int hevc_decode_init(AVCodecContext *avctx)
+{
+    HEVCContext *s = avctx->priv_data;
+    int ret;
+
+    if (avctx->active_thread_type & FF_THREAD_SLICE) {
+        s->threads_number = avctx->thread_count;
+        ret = ff_slice_thread_init_progress(avctx);
+        if (ret < 0)
+            return ret;
+    } else
+        s->threads_number = 1;
+
+    if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
+        s->threads_type = FF_THREAD_FRAME;
+    else
+        s->threads_type = FF_THREAD_SLICE;
+
+    ret = hevc_init_context(avctx);
+    if (ret < 0)
+        return ret;
+
+    s->enable_parallel_tiles = 0;
+    s->sei.picture_timing.picture_struct = 0;
+    s->eos = 1;
+
+    atomic_init(&s->wpp_err, 0);
+
+    if (!avctx->internal->is_copy) {
+        const AVPacketSideData *sd;
+
+        if (avctx->extradata_size > 0 && avctx->extradata) {
+            ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1);
+            if (ret < 0) {
+                return ret;
+            }
+
+            ret = ff_h2645_sei_to_context(avctx, &s->sei.common);
+            if (ret < 0)
+                return ret;
+        }
+
+        sd = ff_get_coded_side_data(avctx, AV_PKT_DATA_DOVI_CONF);
+        if (sd && sd->size >= sizeof(s->dovi_ctx.cfg))
+            s->dovi_ctx.cfg = *(AVDOVIDecoderConfigurationRecord *) sd->data;
+    }
+
+    return 0;
+}
+
+static void hevc_decode_flush(AVCodecContext *avctx)
+{
+    HEVCContext *s = avctx->priv_data;
+    ff_hevc_flush_dpb(s);
+    ff_hevc_reset_sei(&s->sei);
+    ff_dovi_ctx_flush(&s->dovi_ctx);
+    av_buffer_unref(&s->rpu_buf);
+    s->max_ra = INT_MAX;
+    s->eos = 1;
+
+    if (FF_HW_HAS_CB(avctx, flush))
+        FF_HW_SIMPLE_CALL(avctx, flush);
+}
+
+#define OFFSET(x) offsetof(HEVCContext, x)
+#define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+
+static const AVOption options[] = {
+    { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
+        AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
+    { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
+        AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
+    { NULL },
+};
+
+static const AVClass hevc_decoder_class = {
+    .class_name = "HEVC decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const FFCodec ff_hevc_decoder = {
+    .p.name                = "hevc",
+    CODEC_LONG_NAME("HEVC (High Efficiency Video Coding)"),
+    .p.type                = AVMEDIA_TYPE_VIDEO,
+    .p.id                  = AV_CODEC_ID_HEVC,
+    .priv_data_size        = sizeof(HEVCContext),
+    .p.priv_class          = &hevc_decoder_class,
+    .init                  = hevc_decode_init,
+    .close                 = hevc_decode_free,
+    FF_CODEC_DECODE_CB(hevc_decode_frame),
+    .flush                 = hevc_decode_flush,
+    UPDATE_THREAD_CONTEXT(hevc_update_thread_context),
+    .p.capabilities        = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
+                             AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
+    .caps_internal         = FF_CODEC_CAP_EXPORTS_CROPPING |
+                             FF_CODEC_CAP_USES_PROGRESSFRAMES |
+                             FF_CODEC_CAP_INIT_CLEANUP,
+    .p.profiles            = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
+    .hw_configs            = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_HEVC_DXVA2_HWACCEL
+                               HWACCEL_DXVA2(hevc),
+#endif
+#if CONFIG_HEVC_D3D11VA_HWACCEL
+                               HWACCEL_D3D11VA(hevc),
+#endif
+#if CONFIG_HEVC_D3D11VA2_HWACCEL
+                               HWACCEL_D3D11VA2(hevc),
+#endif
+#if CONFIG_HEVC_D3D12VA_HWACCEL
+                               HWACCEL_D3D12VA(hevc),
+#endif
+#if CONFIG_HEVC_NVDEC_HWACCEL
+                               HWACCEL_NVDEC(hevc),
+#endif
+#if CONFIG_HEVC_VAAPI_HWACCEL
+                               HWACCEL_VAAPI(hevc),
+#endif
+#if CONFIG_HEVC_VDPAU_HWACCEL
+                               HWACCEL_VDPAU(hevc),
+#endif
+#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
+                               HWACCEL_VIDEOTOOLBOX(hevc),
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+                               HWACCEL_VULKAN(hevc),
+#endif
+                               NULL
+                           },
+};
diff --git a/libavcodec/hevc/hevcdec.h b/libavcodec/hevc/hevcdec.h
new file mode 100644
index 0000000000..8208268460
--- /dev/null
+++ b/libavcodec/hevc/hevcdec.h
@@ -0,0 +1,679 @@
+/*
+ * HEVC video decoder
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HEVC_HEVCDEC_H
+#define AVCODEC_HEVC_HEVCDEC_H
+
+#include <stdatomic.h>
+
+#include "libavutil/buffer.h"
+#include "libavutil/mem_internal.h"
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/bswapdsp.h"
+#include "libavcodec/cabac.h"
+#include "libavcodec/dovi_rpu.h"
+#include "libavcodec/get_bits.h"
+#include "libavcodec/h2645_parse.h"
+#include "libavcodec/h274.h"
+#include "libavcodec/progressframe.h"
+#include "libavcodec/videodsp.h"
+
+#include "dsp.h"
+#include "hevc.h"
+#include "pred.h"
+#include "ps.h"
+#include "sei.h"
+
+#define SHIFT_CTB_WPP 2
+
+#define MAX_TB_SIZE 32
+#define MAX_QP 51
+#define DEFAULT_INTRA_TC_OFFSET 2
+
+#define HEVC_CONTEXTS 199
+#define HEVC_STAT_COEFFS 4
+
+#define MRG_MAX_NUM_CANDS     5
+
+#define L0 0
+#define L1 1
+
+#define EPEL_EXTRA_BEFORE 1
+#define EPEL_EXTRA_AFTER  2
+#define EPEL_EXTRA        3
+#define QPEL_EXTRA_BEFORE 3
+#define QPEL_EXTRA_AFTER  4
+#define QPEL_EXTRA        7
+
+#define EDGE_EMU_BUFFER_STRIDE 80
+
+/**
+ * Value of the luma sample at position (x, y) in the 2D array tab.
+ */
+#define SAMPLE(tab, x, y) ((tab)[(y) * s->sps->width + (x)])
+#define SAMPLE_CTB(tab, x, y) ((tab)[(y) * min_cb_width + (x)])
+
+#define IS_IDR(s) ((s)->nal_unit_type == HEVC_NAL_IDR_W_RADL || (s)->nal_unit_type == HEVC_NAL_IDR_N_LP)
+#define IS_BLA(s) ((s)->nal_unit_type == HEVC_NAL_BLA_W_RADL || (s)->nal_unit_type == HEVC_NAL_BLA_W_LP || \
+                   (s)->nal_unit_type == HEVC_NAL_BLA_N_LP)
+#define IS_IRAP(s) ((s)->nal_unit_type >= HEVC_NAL_BLA_W_LP && (s)->nal_unit_type <= HEVC_NAL_RSV_IRAP_VCL23)
+
+enum RPSType {
+    ST_CURR_BEF = 0,
+    ST_CURR_AFT,
+    ST_FOLL,
+    LT_CURR,
+    LT_FOLL,
+    NB_RPS_TYPE,
+};
+
+enum PartMode {
+    PART_2Nx2N = 0,
+    PART_2NxN  = 1,
+    PART_Nx2N  = 2,
+    PART_NxN   = 3,
+    PART_2NxnU = 4,
+    PART_2NxnD = 5,
+    PART_nLx2N = 6,
+    PART_nRx2N = 7,
+};
+
+enum PredMode {
+    MODE_INTER = 0,
+    MODE_INTRA,
+    MODE_SKIP,
+};
+
+enum InterPredIdc {
+    PRED_L0 = 0,
+    PRED_L1,
+    PRED_BI,
+};
+
+enum PredFlag {
+    PF_INTRA = 0,
+    PF_L0,
+    PF_L1,
+    PF_BI,
+};
+
+enum IntraPredMode {
+    INTRA_PLANAR = 0,
+    INTRA_DC,
+    INTRA_ANGULAR_2,
+    INTRA_ANGULAR_3,
+    INTRA_ANGULAR_4,
+    INTRA_ANGULAR_5,
+    INTRA_ANGULAR_6,
+    INTRA_ANGULAR_7,
+    INTRA_ANGULAR_8,
+    INTRA_ANGULAR_9,
+    INTRA_ANGULAR_10,
+    INTRA_ANGULAR_11,
+    INTRA_ANGULAR_12,
+    INTRA_ANGULAR_13,
+    INTRA_ANGULAR_14,
+    INTRA_ANGULAR_15,
+    INTRA_ANGULAR_16,
+    INTRA_ANGULAR_17,
+    INTRA_ANGULAR_18,
+    INTRA_ANGULAR_19,
+    INTRA_ANGULAR_20,
+    INTRA_ANGULAR_21,
+    INTRA_ANGULAR_22,
+    INTRA_ANGULAR_23,
+    INTRA_ANGULAR_24,
+    INTRA_ANGULAR_25,
+    INTRA_ANGULAR_26,
+    INTRA_ANGULAR_27,
+    INTRA_ANGULAR_28,
+    INTRA_ANGULAR_29,
+    INTRA_ANGULAR_30,
+    INTRA_ANGULAR_31,
+    INTRA_ANGULAR_32,
+    INTRA_ANGULAR_33,
+    INTRA_ANGULAR_34,
+};
+
+enum SAOType {
+    SAO_NOT_APPLIED = 0,
+    SAO_BAND,
+    SAO_EDGE,
+    SAO_APPLIED
+};
+
+enum SAOEOClass {
+    SAO_EO_HORIZ = 0,
+    SAO_EO_VERT,
+    SAO_EO_135D,
+    SAO_EO_45D,
+};
+
+enum ScanType {
+    SCAN_DIAG = 0,
+    SCAN_HORIZ,
+    SCAN_VERT,
+};
+
+typedef struct HEVCCABACState {
+    uint8_t state[HEVC_CONTEXTS];
+    uint8_t stat_coeff[HEVC_STAT_COEFFS];
+} HEVCCABACState;
+
+typedef struct LongTermRPS {
+    int     poc[32];
+    uint8_t poc_msb_present[32];
+    uint8_t used[32];
+    uint8_t nb_refs;
+} LongTermRPS;
+
+typedef struct RefPicList {
+    struct HEVCFrame *ref[HEVC_MAX_REFS];
+    int list[HEVC_MAX_REFS];
+    int isLongTerm[HEVC_MAX_REFS];
+    int nb_refs;
+} RefPicList;
+
+typedef struct RefPicListTab {
+    RefPicList refPicList[2];
+} RefPicListTab;
+
+typedef struct SliceHeader {
+    unsigned int pps_id;
+
+    ///< address (in raster order) of the first block in the current slice segment
+    unsigned int   slice_segment_addr;
+    ///< address (in raster order) of the first block in the current slice
+    unsigned int   slice_addr;
+
+    enum HEVCSliceType slice_type;
+
+    int pic_order_cnt_lsb;
+
+    uint8_t first_slice_in_pic_flag;
+    uint8_t dependent_slice_segment_flag;
+    uint8_t pic_output_flag;
+    uint8_t colour_plane_id;
+
+    ///< RPS coded in the slice header itself is stored here
+    int short_term_ref_pic_set_sps_flag;
+    int short_term_ref_pic_set_size;
+    ShortTermRPS slice_rps;
+    const ShortTermRPS *short_term_rps;
+    int long_term_ref_pic_set_size;
+    LongTermRPS long_term_rps;
+    unsigned int list_entry_lx[2][32];
+
+    uint8_t rpl_modification_flag[2];
+    uint8_t no_output_of_prior_pics_flag;
+    uint8_t slice_temporal_mvp_enabled_flag;
+
+    unsigned int nb_refs[2];
+
+    uint8_t slice_sample_adaptive_offset_flag[3];
+    uint8_t mvd_l1_zero_flag;
+
+    uint8_t cabac_init_flag;
+    uint8_t disable_deblocking_filter_flag; ///< slice_header_disable_deblocking_filter_flag
+    uint8_t slice_loop_filter_across_slices_enabled_flag;
+    uint8_t collocated_list;
+
+    unsigned int collocated_ref_idx;
+
+    int slice_qp_delta;
+    int slice_cb_qp_offset;
+    int slice_cr_qp_offset;
+
+    int slice_act_y_qp_offset;
+    int slice_act_cb_qp_offset;
+    int slice_act_cr_qp_offset;
+
+    uint8_t cu_chroma_qp_offset_enabled_flag;
+
+    int beta_offset;    ///< beta_offset_div2 * 2
+    int tc_offset;      ///< tc_offset_div2 * 2
+
+    uint8_t max_num_merge_cand; ///< 5 - 5_minus_max_num_merge_cand
+    uint8_t use_integer_mv_flag;
+
+    unsigned *entry_point_offset;
+    int * offset;
+    int * size;
+    int num_entry_point_offsets;
+
+    int8_t slice_qp;
+
+    uint8_t luma_log2_weight_denom;
+    int16_t chroma_log2_weight_denom;
+
+    int16_t luma_weight_l0[16];
+    int16_t chroma_weight_l0[16][2];
+    int16_t chroma_weight_l1[16][2];
+    int16_t luma_weight_l1[16];
+
+    int16_t luma_offset_l0[16];
+    int16_t chroma_offset_l0[16][2];
+
+    int16_t luma_offset_l1[16];
+    int16_t chroma_offset_l1[16][2];
+
+    int slice_ctb_addr_rs;
+    unsigned data_offset;
+} SliceHeader;
+
+typedef struct CodingUnit {
+    int x;
+    int y;
+
+    enum PredMode pred_mode;    ///< PredMode
+    enum PartMode part_mode;    ///< PartMode
+
+    // Inferred parameters
+    uint8_t intra_split_flag;   ///< IntraSplitFlag
+    uint8_t max_trafo_depth;    ///< MaxTrafoDepth
+    uint8_t cu_transquant_bypass_flag;
+} CodingUnit;
+
+typedef struct Mv {
+    int16_t x;  ///< horizontal component of motion vector
+    int16_t y;  ///< vertical component of motion vector
+} Mv;
+
+typedef struct MvField {
+    DECLARE_ALIGNED(4, Mv, mv)[2];
+    int8_t ref_idx[2];
+    int8_t pred_flag;
+} MvField;
+
+typedef struct NeighbourAvailable {
+    int cand_bottom_left;
+    int cand_left;
+    int cand_up;
+    int cand_up_left;
+    int cand_up_right;
+    int cand_up_right_sap;
+} NeighbourAvailable;
+
+typedef struct PredictionUnit {
+    int mpm_idx;
+    int rem_intra_luma_pred_mode;
+    uint8_t intra_pred_mode[4];
+    Mv mvd;
+    uint8_t merge_flag;
+    uint8_t intra_pred_mode_c[4];
+    uint8_t chroma_mode_c[4];
+} PredictionUnit;
+
+typedef struct TransformUnit {
+    int cu_qp_delta;
+
+    int res_scale_val;
+
+    // Inferred parameters;
+    int intra_pred_mode;
+    int intra_pred_mode_c;
+    int chroma_mode_c;
+    uint8_t is_cu_qp_delta_coded;
+    uint8_t is_cu_chroma_qp_offset_coded;
+    int8_t  cu_qp_offset_cb;
+    int8_t  cu_qp_offset_cr;
+    uint8_t cross_pf;
+} TransformUnit;
+
+typedef struct DBParams {
+    int beta_offset;
+    int tc_offset;
+} DBParams;
+
+#define HEVC_FRAME_FLAG_OUTPUT    (1 << 0)
+#define HEVC_FRAME_FLAG_SHORT_REF (1 << 1)
+#define HEVC_FRAME_FLAG_LONG_REF  (1 << 2)
+#define HEVC_FRAME_FLAG_BUMPING   (1 << 3)
+
+#define HEVC_SEQUENCE_COUNTER_MASK 0xff
+#define HEVC_SEQUENCE_COUNTER_INVALID (HEVC_SEQUENCE_COUNTER_MASK + 1)
+
+typedef struct HEVCFrame {
+    union {
+        struct {
+            AVFrame *f;
+        };
+        ProgressFrame tf;
+    };
+    AVFrame *frame_grain;
+    int needs_fg; /* 1 if grain needs to be applied by the decoder */
+    MvField *tab_mvf;              ///< RefStruct reference
+    RefPicList *refPicList;
+    RefPicListTab **rpl_tab;       ///< RefStruct reference
+    int ctb_count;
+    int poc;
+
+    RefPicListTab *rpl;            ///< RefStruct reference
+    int nb_rpl_elems;
+
+    void *hwaccel_picture_private; ///< RefStruct reference
+
+    /**
+     * A sequence counter, so that old frames are output first
+     * after a POC reset
+     */
+    uint16_t sequence;
+
+    /**
+     * A combination of HEVC_FRAME_FLAG_*
+     */
+    uint8_t flags;
+} HEVCFrame;
+
+typedef struct HEVCLocalContext {
+    uint8_t cabac_state[HEVC_CONTEXTS];
+
+    uint8_t stat_coeff[HEVC_STAT_COEFFS];
+
+    uint8_t first_qp_group;
+
+    void *logctx;
+    const struct HEVCContext *parent;
+
+    CABACContext cc;
+
+    /**
+     * This is a pointer to the common CABAC state.
+     * In case entropy_coding_sync_enabled_flag is set,
+     * the CABAC state after decoding the second CTU in a row is
+     * stored here and used to initialize the CABAC state before
+     * decoding the first CTU in the next row.
+     * This is the basis for WPP and in case slice-threading is used,
+     * the next row is decoded by another thread making this state
+     * shared between threads.
+     */
+    HEVCCABACState *common_cabac_state;
+
+    int8_t qp_y;
+    int8_t curr_qp_y;
+
+    int qPy_pred;
+
+    TransformUnit tu;
+
+    uint8_t ctb_left_flag;
+    uint8_t ctb_up_flag;
+    uint8_t ctb_up_right_flag;
+    uint8_t ctb_up_left_flag;
+    int     end_of_tiles_x;
+    int     end_of_tiles_y;
+    /* +7 is for subpixel interpolation, *2 for high bit depths */
+    DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2];
+    /* The extended size between the new edge emu buffer is abused by SAO */
+    DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer2)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2];
+    DECLARE_ALIGNED(32, int16_t, tmp)[MAX_PB_SIZE * MAX_PB_SIZE];
+
+    int ct_depth;
+    CodingUnit cu;
+    PredictionUnit pu;
+    NeighbourAvailable na;
+
+#define BOUNDARY_LEFT_SLICE     (1 << 0)
+#define BOUNDARY_LEFT_TILE      (1 << 1)
+#define BOUNDARY_UPPER_SLICE    (1 << 2)
+#define BOUNDARY_UPPER_TILE     (1 << 3)
+    /* properties of the boundary of the current CTB for the purposes
+     * of the deblocking filter */
+    int boundary_flags;
+
+    // an array of these structs is used for per-thread state - pad its size
+    // to avoid false sharing
+    char padding[128];
+} HEVCLocalContext;
+
+typedef struct HEVCContext {
+    const AVClass *c;  // needed by private avoptions
+    AVCodecContext *avctx;
+
+    HEVCLocalContext     *local_ctx;
+    unsigned           nb_local_ctx;
+
+    uint8_t             threads_type;
+    uint8_t             threads_number;
+
+    int                 width;
+    int                 height;
+
+    /** 1 if the independent slice segment header was successfully parsed */
+    uint8_t slice_initialized;
+
+    AVFrame *output_frame;
+    uint8_t *sao_pixel_buffer_h[3];
+    uint8_t *sao_pixel_buffer_v[3];
+
+    HEVCParamSets ps;
+    HEVCSEI sei;
+    struct AVMD5 *md5_ctx;
+
+    struct FFRefStructPool *tab_mvf_pool;
+    struct FFRefStructPool *rpl_tab_pool;
+
+    ///< candidate references for the current frame
+    RefPicList rps[5];
+
+    SliceHeader sh;
+    SAOParams *sao;
+    DBParams *deblock;
+    enum HEVCNALUnitType nal_unit_type;
+    int temporal_id;  ///< temporal_id_plus1 - 1
+    HEVCFrame *cur_frame;
+    HEVCFrame *collocated_ref;
+    HEVCFrame DPB[32];
+    int poc;
+    int pocTid0;
+    int slice_idx; ///< number of the slice being currently decoded
+    int eos;       ///< current packet contains an EOS/EOB NAL
+    int last_eos;  ///< last packet contains an EOS/EOB NAL
+    int max_ra;
+    int bs_width;
+    int bs_height;
+    int overlap;
+
+    int is_decoded;
+    int no_rasl_output_flag;
+
+    HEVCPredContext hpc;
+    HEVCDSPContext hevcdsp;
+    VideoDSPContext vdsp;
+    BswapDSPContext bdsp;
+    H274FilmGrainDatabase h274db;
+    int8_t *qp_y_tab;
+    uint8_t *horizontal_bs;
+    uint8_t *vertical_bs;
+
+    int32_t *tab_slice_address;
+
+    //  CU
+    uint8_t *skip_flag;
+    uint8_t *tab_ct_depth;
+    // PU
+    uint8_t *tab_ipm;
+
+    uint8_t *cbf_luma; // cbf_luma of colocated TU
+    uint8_t *is_pcm;
+
+    // CTB-level flags affecting loop filter operation
+    uint8_t *filter_slice_edges;
+
+    /** used on BE to byteswap the lines for checksumming */
+    uint8_t *checksum_buf;
+    int      checksum_buf_size;
+
+    /**
+     * Sequence counters for decoded and output frames, so that old
+     * frames are output first after a POC reset
+     */
+    uint16_t seq_decode;
+    uint16_t seq_output;
+
+    /** The target for the common_cabac_state of the local contexts. */
+    HEVCCABACState cabac;
+
+    int enable_parallel_tiles;
+    atomic_int wpp_err;
+
+    const uint8_t *data;
+
+    H2645Packet pkt;
+    // type of the first VCL NAL of the current frame
+    enum HEVCNALUnitType first_nal_type;
+
+    int is_nalff;           ///< this flag is != 0 if bitstream is encapsulated
+                            ///< as a format defined in 14496-15
+    int apply_defdispwin;
+
+    int nal_length_size;    ///< Number of bytes used for nal length (1, 2 or 4)
+    int nuh_layer_id;
+
+    int film_grain_warning_shown;
+
+    AVBufferRef *rpu_buf;       ///< 0 or 1 Dolby Vision RPUs.
+    DOVIContext dovi_ctx;       ///< Dolby Vision decoding context
+} HEVCContext;
+
+/**
+ * Mark all frames in DPB as unused for reference.
+ */
+void ff_hevc_clear_refs(HEVCContext *s);
+
+/**
+ * Drop all frames currently in DPB.
+ */
+void ff_hevc_flush_dpb(HEVCContext *s);
+
+const RefPicList *ff_hevc_get_ref_list(const HEVCContext *s, const HEVCFrame *frame,
+                                       int x0, int y0);
+
+/**
+ * Construct the reference picture sets for the current frame.
+ */
+int ff_hevc_frame_rps(HEVCContext *s);
+
+/**
+ * Construct the reference picture list(s) for the current slice.
+ */
+int ff_hevc_slice_rpl(HEVCContext *s);
+
+void ff_hevc_save_states(HEVCLocalContext *lc, int ctb_addr_ts);
+int ff_hevc_cabac_init(HEVCLocalContext *lc, int ctb_addr_ts,
+                       const uint8_t *data, size_t size);
+int ff_hevc_sao_merge_flag_decode(HEVCLocalContext *lc);
+int ff_hevc_sao_type_idx_decode(HEVCLocalContext *lc);
+int ff_hevc_sao_band_position_decode(HEVCLocalContext *lc);
+int ff_hevc_sao_offset_abs_decode(HEVCLocalContext *lc);
+int ff_hevc_sao_offset_sign_decode(HEVCLocalContext *lc);
+int ff_hevc_sao_eo_class_decode(HEVCLocalContext *lc);
+int ff_hevc_end_of_slice_flag_decode(HEVCLocalContext *lc);
+int ff_hevc_cu_transquant_bypass_flag_decode(HEVCLocalContext *lc);
+int ff_hevc_skip_flag_decode(HEVCLocalContext *lc, int x0, int y0,
+                             int x_cb, int y_cb);
+int ff_hevc_pred_mode_decode(HEVCLocalContext *lc);
+int ff_hevc_split_coding_unit_flag_decode(HEVCLocalContext *lc, int ct_depth,
+                                          int x0, int y0);
+int ff_hevc_part_mode_decode(HEVCLocalContext *lc, int log2_cb_size);
+int ff_hevc_pcm_flag_decode(HEVCLocalContext *lc);
+int ff_hevc_prev_intra_luma_pred_flag_decode(HEVCLocalContext *lc);
+int ff_hevc_mpm_idx_decode(HEVCLocalContext *lc);
+int ff_hevc_rem_intra_luma_pred_mode_decode(HEVCLocalContext *lc);
+int ff_hevc_intra_chroma_pred_mode_decode(HEVCLocalContext *lc);
+int ff_hevc_merge_idx_decode(HEVCLocalContext *lc);
+int ff_hevc_merge_flag_decode(HEVCLocalContext *lc);
+int ff_hevc_inter_pred_idc_decode(HEVCLocalContext *lc, int nPbW, int nPbH);
+int ff_hevc_ref_idx_lx_decode(HEVCLocalContext *lc, int num_ref_idx_lx);
+int ff_hevc_mvp_lx_flag_decode(HEVCLocalContext *lc);
+int ff_hevc_no_residual_syntax_flag_decode(HEVCLocalContext *lc);
+int ff_hevc_split_transform_flag_decode(HEVCLocalContext *lc, int log2_trafo_size);
+int ff_hevc_cbf_cb_cr_decode(HEVCLocalContext *lc, int trafo_depth);
+int ff_hevc_cbf_luma_decode(HEVCLocalContext *lc, int trafo_depth);
+int ff_hevc_log2_res_scale_abs(HEVCLocalContext *lc, int idx);
+int ff_hevc_res_scale_sign_flag(HEVCLocalContext *lc, int idx);
+
+/**
+ * Get the number of candidate references for the current frame.
+ */
+int ff_hevc_frame_nb_refs(const HEVCContext *s);
+
+int ff_hevc_set_new_ref(HEVCContext *s, int poc);
+
+static av_always_inline int ff_hevc_nal_is_nonref(enum HEVCNALUnitType type)
+{
+    switch (type) {
+    case HEVC_NAL_TRAIL_N:
+    case HEVC_NAL_TSA_N:
+    case HEVC_NAL_STSA_N:
+    case HEVC_NAL_RADL_N:
+    case HEVC_NAL_RASL_N:
+    case HEVC_NAL_VCL_N10:
+    case HEVC_NAL_VCL_N12:
+    case HEVC_NAL_VCL_N14:
+        return 1;
+    default: break;
+    }
+    return 0;
+}
+
+/**
+ * Find next frame in output order and put a reference to it in frame.
+ * @return 1 if a frame was output, 0 otherwise
+ */
+int ff_hevc_output_frame(HEVCContext *s, AVFrame *frame, int flush);
+
+void ff_hevc_bump_frame(HEVCContext *s);
+
+void ff_hevc_unref_frame(HEVCFrame *frame, int flags);
+
+void ff_hevc_set_neighbour_available(HEVCLocalContext *lc, int x0, int y0,
+                                     int nPbW, int nPbH);
+void ff_hevc_luma_mv_merge_mode(HEVCLocalContext *lc, int x0, int y0,
+                                int nPbW, int nPbH, int log2_cb_size,
+                                int part_idx, int merge_idx, MvField *mv);
+void ff_hevc_luma_mv_mvp_mode(HEVCLocalContext *lc, int x0, int y0,
+                              int nPbW, int nPbH, int log2_cb_size,
+                              int part_idx, int merge_idx,
+                              MvField *mv, int mvp_lx_flag, int LX);
+void ff_hevc_hls_filter(HEVCLocalContext *lc, int x, int y, int ctb_size);
+void ff_hevc_hls_filters(HEVCLocalContext *lc, int x_ctb, int y_ctb, int ctb_size);
+void ff_hevc_set_qPy(HEVCLocalContext *lc, int xBase, int yBase,
+                     int log2_cb_size);
+void ff_hevc_deblocking_boundary_strengths(HEVCLocalContext *lc, int x0, int y0,
+                                           int log2_trafo_size);
+int ff_hevc_cu_qp_delta_sign_flag(HEVCLocalContext *lc);
+int ff_hevc_cu_qp_delta_abs(HEVCLocalContext *lc);
+int ff_hevc_cu_chroma_qp_offset_flag(HEVCLocalContext *lc);
+int ff_hevc_cu_chroma_qp_offset_idx(HEVCLocalContext *lc);
+void ff_hevc_hls_residual_coding(HEVCLocalContext *lc, int x0, int y0,
+                                 int log2_trafo_size, enum ScanType scan_idx,
+                                 int c_idx);
+
+void ff_hevc_hls_mvd_coding(HEVCLocalContext *lc, int x0, int y0, int log2_cb_size);
+
+extern const uint8_t ff_hevc_qpel_extra_before[4];
+extern const uint8_t ff_hevc_qpel_extra_after[4];
+extern const uint8_t ff_hevc_qpel_extra[4];
+
+#endif /* AVCODEC_HEVC_HEVCDEC_H */
diff --git a/libavcodec/hevc/mvs.c b/libavcodec/hevc/mvs.c
new file mode 100644
index 0000000000..b56f0bece5
--- /dev/null
+++ b/libavcodec/hevc/mvs.c
@@ -0,0 +1,775 @@
+/*
+ * HEVC video decoder
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ * Copyright (C) 2013 Anand Meher Kotra
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "hevc.h"
+#include "hevcdec.h"
+#include "progressframe.h"
+
+static const uint8_t l0_l1_cand_idx[12][2] = {
+    { 0, 1, },
+    { 1, 0, },
+    { 0, 2, },
+    { 2, 0, },
+    { 1, 2, },
+    { 2, 1, },
+    { 0, 3, },
+    { 3, 0, },
+    { 1, 3, },
+    { 3, 1, },
+    { 2, 3, },
+    { 3, 2, },
+};
+
+void ff_hevc_set_neighbour_available(HEVCLocalContext *lc, int x0, int y0,
+                                     int nPbW, int nPbH)
+{
+    const HEVCContext *const s = lc->parent;
+    int x0b = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
+    int y0b = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
+
+    lc->na.cand_up       = (lc->ctb_up_flag   || y0b);
+    lc->na.cand_left     = (lc->ctb_left_flag || x0b);
+    lc->na.cand_up_left  = (x0b || y0b) ? lc->na.cand_left && lc->na.cand_up : lc->ctb_up_left_flag;
+    lc->na.cand_up_right_sap =
+            (x0b + nPbW == 1 << s->ps.sps->log2_ctb_size) ?
+                    lc->ctb_up_right_flag && !y0b : lc->na.cand_up;
+    lc->na.cand_up_right =
+            lc->na.cand_up_right_sap
+                     && (x0 + nPbW) < lc->end_of_tiles_x;
+    lc->na.cand_bottom_left = ((y0 + nPbH) >= lc->end_of_tiles_y) ? 0 : lc->na.cand_left;
+}
+
+/*
+ * 6.4.1 Derivation process for z-scan order block availability
+ */
+static av_always_inline int z_scan_block_avail(const HEVCContext *s, int xCurr, int yCurr,
+                              int xN, int yN)
+{
+#define MIN_TB_ADDR_ZS(x, y)                                            \
+    s->ps.pps->min_tb_addr_zs[(y) * (s->ps.sps->tb_mask+2) + (x)]
+
+    int xCurr_ctb = xCurr >> s->ps.sps->log2_ctb_size;
+    int yCurr_ctb = yCurr >> s->ps.sps->log2_ctb_size;
+    int xN_ctb    = xN    >> s->ps.sps->log2_ctb_size;
+    int yN_ctb    = yN    >> s->ps.sps->log2_ctb_size;
+    if( yN_ctb < yCurr_ctb || xN_ctb < xCurr_ctb )
+        return 1;
+    else {
+        int Curr = MIN_TB_ADDR_ZS((xCurr >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask,
+                (yCurr >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask);
+        int N    = MIN_TB_ADDR_ZS((xN >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask,
+                (yN >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask);
+        return N <= Curr;
+    }
+}
+
+//check if the two luma locations belong to the same motion estimation region
+static av_always_inline int is_diff_mer(const HEVCContext *s, int xN, int yN, int xP, int yP)
+{
+    uint8_t plevel = s->ps.pps->log2_parallel_merge_level;
+
+    return xN >> plevel == xP >> plevel &&
+           yN >> plevel == yP >> plevel;
+}
+
+#define MATCH_MV(x) (AV_RN32A(&A.x) == AV_RN32A(&B.x))
+#define MATCH(x) (A.x == B.x)
+
+// check if the mv's and refidx are the same between A and B
+static av_always_inline int compare_mv_ref_idx(struct MvField A, struct MvField B)
+{
+    int a_pf = A.pred_flag;
+    int b_pf = B.pred_flag;
+    if (a_pf == b_pf) {
+        if (a_pf == PF_BI) {
+            return MATCH(ref_idx[0]) && MATCH_MV(mv[0]) &&
+                   MATCH(ref_idx[1]) && MATCH_MV(mv[1]);
+        } else if (a_pf == PF_L0) {
+            return MATCH(ref_idx[0]) && MATCH_MV(mv[0]);
+        } else if (a_pf == PF_L1) {
+            return MATCH(ref_idx[1]) && MATCH_MV(mv[1]);
+        }
+    }
+    return 0;
+}
+
+static av_always_inline void mv_scale(Mv *dst, const Mv *src, int td, int tb)
+{
+    int tx, scale_factor;
+
+    td = av_clip_int8(td);
+    tb = av_clip_int8(tb);
+    tx = (0x4000 + abs(td / 2)) / td;
+    scale_factor = av_clip_intp2((tb * tx + 32) >> 6, 12);
+    dst->x = av_clip_int16((scale_factor * src->x + 127 +
+                           (scale_factor * src->x < 0)) >> 8);
+    dst->y = av_clip_int16((scale_factor * src->y + 127 +
+                           (scale_factor * src->y < 0)) >> 8);
+}
+
+static int check_mvset(Mv *mvLXCol, const Mv *mvCol,
+                       int colPic, int poc,
+                       const RefPicList *refPicList, int X, int refIdxLx,
+                       const RefPicList *refPicList_col, int listCol, int refidxCol)
+{
+    int cur_lt = refPicList[X].isLongTerm[refIdxLx];
+    int col_lt = refPicList_col[listCol].isLongTerm[refidxCol];
+    int col_poc_diff, cur_poc_diff;
+
+    if (cur_lt != col_lt) {
+        mvLXCol->x = 0;
+        mvLXCol->y = 0;
+        return 0;
+    }
+
+    col_poc_diff = colPic - refPicList_col[listCol].list[refidxCol];
+    cur_poc_diff = poc    - refPicList[X].list[refIdxLx];
+
+    if (cur_lt || col_poc_diff == cur_poc_diff || !col_poc_diff) {
+        mvLXCol->x = mvCol->x;
+        mvLXCol->y = mvCol->y;
+    } else {
+        mv_scale(mvLXCol, mvCol, col_poc_diff, cur_poc_diff);
+    }
+    return 1;
+}
+
+#define CHECK_MVSET(l)                                          \
+    check_mvset(mvLXCol, temp_col.mv + l,                       \
+                colPic, s->poc,                                 \
+                refPicList, X, refIdxLx,                        \
+                refPicList_col, L ## l, temp_col.ref_idx[l])
+
+// derive the motion vectors section 8.5.3.1.8
+static int derive_temporal_colocated_mvs(const HEVCContext *s, MvField temp_col,
+                                         int refIdxLx, Mv *mvLXCol, int X,
+                                         int colPic, const RefPicList *refPicList_col)
+{
+    const RefPicList *refPicList = s->cur_frame->refPicList;
+
+    if (temp_col.pred_flag == PF_INTRA)
+        return 0;
+
+    if (!(temp_col.pred_flag & PF_L0))
+        return CHECK_MVSET(1);
+    else if (temp_col.pred_flag == PF_L0)
+        return CHECK_MVSET(0);
+    else if (temp_col.pred_flag == PF_BI) {
+        int check_diffpicount = 0;
+        int i, j;
+        for (j = 0; j < 2; j++) {
+            for (i = 0; i < refPicList[j].nb_refs; i++) {
+                if (refPicList[j].list[i] > s->poc) {
+                    check_diffpicount++;
+                    break;
+                }
+            }
+        }
+        if (!check_diffpicount) {
+            if (X==0)
+                return CHECK_MVSET(0);
+            else
+                return CHECK_MVSET(1);
+        } else {
+            if (s->sh.collocated_list == L1)
+                return CHECK_MVSET(0);
+            else
+                return CHECK_MVSET(1);
+        }
+    }
+
+    return 0;
+}
+
+#define TAB_MVF(x, y)                                                   \
+    tab_mvf[(y) * min_pu_width + x]
+
+#define TAB_MVF_PU(v)                                                   \
+    TAB_MVF(((x ## v) >> s->ps.sps->log2_min_pu_size),                     \
+            ((y ## v) >> s->ps.sps->log2_min_pu_size))
+
+#define DERIVE_TEMPORAL_COLOCATED_MVS                                   \
+    derive_temporal_colocated_mvs(s, temp_col,                          \
+                                  refIdxLx, mvLXCol, X, colPic,         \
+                                  ff_hevc_get_ref_list(s, ref, x, y))
+
+/*
+ * 8.5.3.1.7  temporal luma motion vector prediction
+ */
+static int temporal_luma_motion_vector(const HEVCContext *s, int x0, int y0,
+                                       int nPbW, int nPbH, int refIdxLx,
+                                       Mv *mvLXCol, int X)
+{
+    const MvField *tab_mvf;
+    MvField temp_col;
+    int x, y, x_pu, y_pu;
+    int min_pu_width = s->ps.sps->min_pu_width;
+    int availableFlagLXCol = 0;
+    int colPic;
+
+    const HEVCFrame *ref = s->collocated_ref;
+
+    if (!ref) {
+        memset(mvLXCol, 0, sizeof(*mvLXCol));
+        return 0;
+    }
+
+    tab_mvf = ref->tab_mvf;
+    colPic  = ref->poc;
+
+    //bottom right collocated motion vector
+    x = x0 + nPbW;
+    y = y0 + nPbH;
+
+    if (tab_mvf &&
+        (y0 >> s->ps.sps->log2_ctb_size) == (y >> s->ps.sps->log2_ctb_size) &&
+        y < s->ps.sps->height &&
+        x < s->ps.sps->width) {
+        x                 &= ~15;
+        y                 &= ~15;
+        if (s->threads_type == FF_THREAD_FRAME)
+            ff_progress_frame_await(&ref->tf, y);
+        x_pu               = x >> s->ps.sps->log2_min_pu_size;
+        y_pu               = y >> s->ps.sps->log2_min_pu_size;
+        temp_col           = TAB_MVF(x_pu, y_pu);
+        availableFlagLXCol = DERIVE_TEMPORAL_COLOCATED_MVS;
+    }
+
+    // derive center collocated motion vector
+    if (tab_mvf && !availableFlagLXCol) {
+        x                  = x0 + (nPbW >> 1);
+        y                  = y0 + (nPbH >> 1);
+        x                 &= ~15;
+        y                 &= ~15;
+        if (s->threads_type == FF_THREAD_FRAME)
+            ff_progress_frame_await(&ref->tf, y);
+        x_pu               = x >> s->ps.sps->log2_min_pu_size;
+        y_pu               = y >> s->ps.sps->log2_min_pu_size;
+        temp_col           = TAB_MVF(x_pu, y_pu);
+        availableFlagLXCol = DERIVE_TEMPORAL_COLOCATED_MVS;
+    }
+    return availableFlagLXCol;
+}
+
+#define AVAILABLE(cand, v)                                      \
+    (cand && !(TAB_MVF_PU(v).pred_flag == PF_INTRA))
+
+#define PRED_BLOCK_AVAILABLE(v)                                 \
+    z_scan_block_avail(s, x0, y0, x ## v, y ## v)
+
+#define COMPARE_MV_REFIDX(a, b)                                 \
+    compare_mv_ref_idx(TAB_MVF_PU(a), TAB_MVF_PU(b))
+
+/*
+ * 8.5.3.1.2  Derivation process for spatial merging candidates
+ */
+static void derive_spatial_merge_candidates(HEVCLocalContext *lc, const HEVCContext *s,
+                                            int x0, int y0,
+                                            int nPbW, int nPbH,
+                                            int log2_cb_size,
+                                            int singleMCLFlag, int part_idx,
+                                            int merge_idx,
+                                            struct MvField mergecandlist[])
+{
+    const RefPicList *refPicList = s->cur_frame->refPicList;
+    const MvField *tab_mvf       = s->cur_frame->tab_mvf;
+
+    const int min_pu_width = s->ps.sps->min_pu_width;
+
+    const int cand_bottom_left = lc->na.cand_bottom_left;
+    const int cand_left        = lc->na.cand_left;
+    const int cand_up_left     = lc->na.cand_up_left;
+    const int cand_up          = lc->na.cand_up;
+    const int cand_up_right    = lc->na.cand_up_right_sap;
+
+    const int xA1    = x0 - 1;
+    const int yA1    = y0 + nPbH - 1;
+
+    const int xB1    = x0 + nPbW - 1;
+    const int yB1    = y0 - 1;
+
+    const int xB0    = x0 + nPbW;
+    const int yB0    = y0 - 1;
+
+    const int xA0    = x0 - 1;
+    const int yA0    = y0 + nPbH;
+
+    const int xB2    = x0 - 1;
+    const int yB2    = y0 - 1;
+
+    const int nb_refs = (s->sh.slice_type == HEVC_SLICE_P) ?
+                        s->sh.nb_refs[0] : FFMIN(s->sh.nb_refs[0], s->sh.nb_refs[1]);
+
+    int zero_idx = 0;
+
+    int nb_merge_cand = 0;
+    int nb_orig_merge_cand = 0;
+
+    int is_available_a0;
+    int is_available_a1;
+    int is_available_b0;
+    int is_available_b1;
+    int is_available_b2;
+
+
+    if (!singleMCLFlag && part_idx == 1 &&
+        (lc->cu.part_mode == PART_Nx2N ||
+         lc->cu.part_mode == PART_nLx2N ||
+         lc->cu.part_mode == PART_nRx2N) ||
+        is_diff_mer(s, xA1, yA1, x0, y0)) {
+        is_available_a1 = 0;
+    } else {
+        is_available_a1 = AVAILABLE(cand_left, A1);
+        if (is_available_a1) {
+            mergecandlist[nb_merge_cand] = TAB_MVF_PU(A1);
+            if (merge_idx == 0)
+                return;
+            nb_merge_cand++;
+        }
+    }
+
+    if (!singleMCLFlag && part_idx == 1 &&
+        (lc->cu.part_mode == PART_2NxN ||
+         lc->cu.part_mode == PART_2NxnU ||
+         lc->cu.part_mode == PART_2NxnD) ||
+        is_diff_mer(s, xB1, yB1, x0, y0)) {
+        is_available_b1 = 0;
+    } else {
+        is_available_b1 = AVAILABLE(cand_up, B1);
+        if (is_available_b1 &&
+            !(is_available_a1 && COMPARE_MV_REFIDX(B1, A1))) {
+            mergecandlist[nb_merge_cand] = TAB_MVF_PU(B1);
+            if (merge_idx == nb_merge_cand)
+                return;
+            nb_merge_cand++;
+        }
+    }
+
+    // above right spatial merge candidate
+    is_available_b0 = AVAILABLE(cand_up_right, B0) &&
+                      xB0 < s->ps.sps->width &&
+                      PRED_BLOCK_AVAILABLE(B0) &&
+                      !is_diff_mer(s, xB0, yB0, x0, y0);
+
+    if (is_available_b0 &&
+        !(is_available_b1 && COMPARE_MV_REFIDX(B0, B1))) {
+        mergecandlist[nb_merge_cand] = TAB_MVF_PU(B0);
+        if (merge_idx == nb_merge_cand)
+            return;
+        nb_merge_cand++;
+    }
+
+    // left bottom spatial merge candidate
+    is_available_a0 = AVAILABLE(cand_bottom_left, A0) &&
+                      yA0 < s->ps.sps->height &&
+                      PRED_BLOCK_AVAILABLE(A0) &&
+                      !is_diff_mer(s, xA0, yA0, x0, y0);
+
+    if (is_available_a0 &&
+        !(is_available_a1 && COMPARE_MV_REFIDX(A0, A1))) {
+        mergecandlist[nb_merge_cand] = TAB_MVF_PU(A0);
+        if (merge_idx == nb_merge_cand)
+            return;
+        nb_merge_cand++;
+    }
+
+    // above left spatial merge candidate
+    is_available_b2 = AVAILABLE(cand_up_left, B2) &&
+                      !is_diff_mer(s, xB2, yB2, x0, y0);
+
+    if (is_available_b2 &&
+        !(is_available_a1 && COMPARE_MV_REFIDX(B2, A1)) &&
+        !(is_available_b1 && COMPARE_MV_REFIDX(B2, B1)) &&
+        nb_merge_cand != 4) {
+        mergecandlist[nb_merge_cand] = TAB_MVF_PU(B2);
+        if (merge_idx == nb_merge_cand)
+            return;
+        nb_merge_cand++;
+    }
+
+    // temporal motion vector candidate
+    if (s->sh.slice_temporal_mvp_enabled_flag &&
+        nb_merge_cand < s->sh.max_num_merge_cand) {
+        Mv mv_l0_col = { 0 }, mv_l1_col = { 0 };
+        int available_l0 = temporal_luma_motion_vector(s, x0, y0, nPbW, nPbH,
+                                                       0, &mv_l0_col, 0);
+        int available_l1 = (s->sh.slice_type == HEVC_SLICE_B) ?
+                           temporal_luma_motion_vector(s, x0, y0, nPbW, nPbH,
+                                                       0, &mv_l1_col, 1) : 0;
+
+        if (available_l0 || available_l1) {
+            mergecandlist[nb_merge_cand].pred_flag = available_l0 + (available_l1 << 1);
+            AV_ZERO16(mergecandlist[nb_merge_cand].ref_idx);
+            mergecandlist[nb_merge_cand].mv[0]      = mv_l0_col;
+            mergecandlist[nb_merge_cand].mv[1]      = mv_l1_col;
+
+            if (merge_idx == nb_merge_cand)
+                return;
+            nb_merge_cand++;
+        }
+    }
+
+    nb_orig_merge_cand = nb_merge_cand;
+
+    // combined bi-predictive merge candidates  (applies for B slices)
+    if (s->sh.slice_type == HEVC_SLICE_B && nb_orig_merge_cand > 1 &&
+        nb_orig_merge_cand < s->sh.max_num_merge_cand) {
+        int comb_idx = 0;
+
+        for (comb_idx = 0; nb_merge_cand < s->sh.max_num_merge_cand &&
+                           comb_idx < nb_orig_merge_cand * (nb_orig_merge_cand - 1); comb_idx++) {
+            int l0_cand_idx = l0_l1_cand_idx[comb_idx][0];
+            int l1_cand_idx = l0_l1_cand_idx[comb_idx][1];
+            MvField l0_cand = mergecandlist[l0_cand_idx];
+            MvField l1_cand = mergecandlist[l1_cand_idx];
+
+            if ((l0_cand.pred_flag & PF_L0) && (l1_cand.pred_flag & PF_L1) &&
+                (refPicList[0].list[l0_cand.ref_idx[0]] !=
+                 refPicList[1].list[l1_cand.ref_idx[1]] ||
+                 AV_RN32A(&l0_cand.mv[0]) != AV_RN32A(&l1_cand.mv[1]))) {
+                mergecandlist[nb_merge_cand].ref_idx[0]   = l0_cand.ref_idx[0];
+                mergecandlist[nb_merge_cand].ref_idx[1]   = l1_cand.ref_idx[1];
+                mergecandlist[nb_merge_cand].pred_flag    = PF_BI;
+                AV_COPY32(&mergecandlist[nb_merge_cand].mv[0], &l0_cand.mv[0]);
+                AV_COPY32(&mergecandlist[nb_merge_cand].mv[1], &l1_cand.mv[1]);
+                if (merge_idx == nb_merge_cand)
+                    return;
+                nb_merge_cand++;
+            }
+        }
+    }
+
+    // append Zero motion vector candidates
+    while (nb_merge_cand < s->sh.max_num_merge_cand) {
+        mergecandlist[nb_merge_cand].pred_flag    = PF_L0 + ((s->sh.slice_type == HEVC_SLICE_B) << 1);
+        AV_ZERO32(mergecandlist[nb_merge_cand].mv + 0);
+        AV_ZERO32(mergecandlist[nb_merge_cand].mv + 1);
+        mergecandlist[nb_merge_cand].ref_idx[0]   = zero_idx < nb_refs ? zero_idx : 0;
+        mergecandlist[nb_merge_cand].ref_idx[1]   = zero_idx < nb_refs ? zero_idx : 0;
+
+        if (merge_idx == nb_merge_cand)
+            return;
+        nb_merge_cand++;
+        zero_idx++;
+    }
+}
+
+/*
+ * 8.5.3.1.1 Derivation process of luma Mvs for merge mode
+ */
+void ff_hevc_luma_mv_merge_mode(HEVCLocalContext *lc, int x0, int y0, int nPbW,
+                                int nPbH, int log2_cb_size, int part_idx,
+                                int merge_idx, MvField *mv)
+{
+    const HEVCContext *const s = lc->parent;
+    int singleMCLFlag = 0;
+    int nCS = 1 << log2_cb_size;
+    MvField mergecand_list[MRG_MAX_NUM_CANDS];
+    int nPbW2 = nPbW;
+    int nPbH2 = nPbH;
+
+    if (s->ps.pps->log2_parallel_merge_level > 2 && nCS == 8) {
+        singleMCLFlag = 1;
+        x0            = lc->cu.x;
+        y0            = lc->cu.y;
+        nPbW          = nCS;
+        nPbH          = nCS;
+        part_idx      = 0;
+    }
+
+    ff_hevc_set_neighbour_available(lc, x0, y0, nPbW, nPbH);
+    derive_spatial_merge_candidates(lc, s, x0, y0, nPbW, nPbH, log2_cb_size,
+                                    singleMCLFlag, part_idx,
+                                    merge_idx, mergecand_list);
+
+    if (mergecand_list[merge_idx].pred_flag == PF_BI &&
+        (nPbW2 + nPbH2) == 12) {
+        mergecand_list[merge_idx].pred_flag = PF_L0;
+    }
+
+    *mv = mergecand_list[merge_idx];
+}
+
+static av_always_inline void dist_scale(const HEVCContext *s, Mv *mv,
+                                        int min_pu_width, int x, int y,
+                                        int elist, int ref_idx_curr, int ref_idx)
+{
+    const RefPicList *refPicList = s->cur_frame->refPicList;
+    const MvField *tab_mvf       = s->cur_frame->tab_mvf;
+    int ref_pic_elist      = refPicList[elist].list[TAB_MVF(x, y).ref_idx[elist]];
+    int ref_pic_curr       = refPicList[ref_idx_curr].list[ref_idx];
+
+    if (ref_pic_elist != ref_pic_curr) {
+        int poc_diff = s->poc - ref_pic_elist;
+        if (!poc_diff)
+            poc_diff = 1;
+        mv_scale(mv, mv, poc_diff, s->poc - ref_pic_curr);
+    }
+}
+
+static int mv_mp_mode_mx(const HEVCContext *s, int x, int y, int pred_flag_index,
+                         Mv *mv, int ref_idx_curr, int ref_idx)
+{
+    const MvField *tab_mvf = s->cur_frame->tab_mvf;
+    int min_pu_width = s->ps.sps->min_pu_width;
+
+    const RefPicList *refPicList = s->cur_frame->refPicList;
+
+    if (((TAB_MVF(x, y).pred_flag) & (1 << pred_flag_index)) &&
+        refPicList[pred_flag_index].list[TAB_MVF(x, y).ref_idx[pred_flag_index]] == refPicList[ref_idx_curr].list[ref_idx]) {
+        *mv = TAB_MVF(x, y).mv[pred_flag_index];
+        return 1;
+    }
+    return 0;
+}
+
+static int mv_mp_mode_mx_lt(const HEVCContext *s, int x, int y, int pred_flag_index,
+                            Mv *mv, int ref_idx_curr, int ref_idx)
+{
+    const MvField *tab_mvf = s->cur_frame->tab_mvf;
+    int min_pu_width = s->ps.sps->min_pu_width;
+
+    const RefPicList *refPicList = s->cur_frame->refPicList;
+
+    if ((TAB_MVF(x, y).pred_flag) & (1 << pred_flag_index)) {
+        int currIsLongTerm     = refPicList[ref_idx_curr].isLongTerm[ref_idx];
+
+        int colIsLongTerm =
+            refPicList[pred_flag_index].isLongTerm[(TAB_MVF(x, y).ref_idx[pred_flag_index])];
+
+        if (colIsLongTerm == currIsLongTerm) {
+            *mv = TAB_MVF(x, y).mv[pred_flag_index];
+            if (!currIsLongTerm)
+                dist_scale(s, mv, min_pu_width, x, y,
+                           pred_flag_index, ref_idx_curr, ref_idx);
+            return 1;
+        }
+    }
+    return 0;
+}
+
+#define MP_MX(v, pred, mx)                                      \
+    mv_mp_mode_mx(s,                                            \
+                  (x ## v) >> s->ps.sps->log2_min_pu_size,         \
+                  (y ## v) >> s->ps.sps->log2_min_pu_size,         \
+                  pred, &mx, ref_idx_curr, ref_idx)
+
+#define MP_MX_LT(v, pred, mx)                                   \
+    mv_mp_mode_mx_lt(s,                                         \
+                     (x ## v) >> s->ps.sps->log2_min_pu_size,      \
+                     (y ## v) >> s->ps.sps->log2_min_pu_size,      \
+                     pred, &mx, ref_idx_curr, ref_idx)
+
+void ff_hevc_luma_mv_mvp_mode(HEVCLocalContext *lc, int x0, int y0, int nPbW,
+                              int nPbH, int log2_cb_size, int part_idx,
+                              int merge_idx, MvField *mv,
+                              int mvp_lx_flag, int LX)
+{
+    const HEVCContext *const s = lc->parent;
+    const MvField *const tab_mvf = s->cur_frame->tab_mvf;
+    int isScaledFlag_L0 = 0;
+    int availableFlagLXA0 = 1;
+    int availableFlagLXB0 = 1;
+    int numMVPCandLX = 0;
+    int min_pu_width = s->ps.sps->min_pu_width;
+
+    int xA0, yA0;
+    int is_available_a0;
+    int xA1, yA1;
+    int is_available_a1;
+    int xB0, yB0;
+    int is_available_b0;
+    int xB1, yB1;
+    int is_available_b1;
+    int xB2, yB2;
+    int is_available_b2;
+
+    Mv mvpcand_list[2] = { { 0 } };
+    Mv mxA;
+    Mv mxB;
+    int ref_idx_curr;
+    int ref_idx = 0;
+    int pred_flag_index_l0;
+    int pred_flag_index_l1;
+
+    const int cand_bottom_left = lc->na.cand_bottom_left;
+    const int cand_left        = lc->na.cand_left;
+    const int cand_up_left     = lc->na.cand_up_left;
+    const int cand_up          = lc->na.cand_up;
+    const int cand_up_right    = lc->na.cand_up_right_sap;
+    ref_idx_curr       = LX;
+    ref_idx            = mv->ref_idx[LX];
+    pred_flag_index_l0 = LX;
+    pred_flag_index_l1 = !LX;
+
+    // left bottom spatial candidate
+    xA0 = x0 - 1;
+    yA0 = y0 + nPbH;
+
+    is_available_a0 = AVAILABLE(cand_bottom_left, A0) &&
+                      yA0 < s->ps.sps->height &&
+                      PRED_BLOCK_AVAILABLE(A0);
+
+    //left spatial merge candidate
+    xA1    = x0 - 1;
+    yA1    = y0 + nPbH - 1;
+
+    is_available_a1 = AVAILABLE(cand_left, A1);
+    if (is_available_a0 || is_available_a1)
+        isScaledFlag_L0 = 1;
+
+    if (is_available_a0) {
+        if (MP_MX(A0, pred_flag_index_l0, mxA)) {
+            goto b_candidates;
+        }
+        if (MP_MX(A0, pred_flag_index_l1, mxA)) {
+            goto b_candidates;
+        }
+    }
+
+    if (is_available_a1) {
+        if (MP_MX(A1, pred_flag_index_l0, mxA)) {
+            goto b_candidates;
+        }
+        if (MP_MX(A1, pred_flag_index_l1, mxA)) {
+            goto b_candidates;
+        }
+    }
+
+    if (is_available_a0) {
+        if (MP_MX_LT(A0, pred_flag_index_l0, mxA)) {
+            goto b_candidates;
+        }
+        if (MP_MX_LT(A0, pred_flag_index_l1, mxA)) {
+            goto b_candidates;
+        }
+    }
+
+    if (is_available_a1) {
+        if (MP_MX_LT(A1, pred_flag_index_l0, mxA)) {
+            goto b_candidates;
+        }
+        if (MP_MX_LT(A1, pred_flag_index_l1, mxA)) {
+            goto b_candidates;
+        }
+    }
+    availableFlagLXA0 = 0;
+
+b_candidates:
+    // B candidates
+    // above right spatial merge candidate
+    xB0    = x0 + nPbW;
+    yB0    = y0 - 1;
+
+    is_available_b0 =  AVAILABLE(cand_up_right, B0) &&
+                       xB0 < s->ps.sps->width &&
+                       PRED_BLOCK_AVAILABLE(B0);
+
+    // above spatial merge candidate
+    xB1    = x0 + nPbW - 1;
+    yB1    = y0 - 1;
+    is_available_b1 = AVAILABLE(cand_up, B1);
+
+    // above left spatial merge candidate
+    xB2 = x0 - 1;
+    yB2 = y0 - 1;
+    is_available_b2 = AVAILABLE(cand_up_left, B2);
+
+    // above right spatial merge candidate
+    if (is_available_b0) {
+        if (MP_MX(B0, pred_flag_index_l0, mxB)) {
+            goto scalef;
+        }
+        if (MP_MX(B0, pred_flag_index_l1, mxB)) {
+            goto scalef;
+        }
+    }
+
+    // above spatial merge candidate
+    if (is_available_b1) {
+        if (MP_MX(B1, pred_flag_index_l0, mxB)) {
+            goto scalef;
+        }
+        if (MP_MX(B1, pred_flag_index_l1, mxB)) {
+            goto scalef;
+        }
+    }
+
+    // above left spatial merge candidate
+    if (is_available_b2) {
+        if (MP_MX(B2, pred_flag_index_l0, mxB)) {
+            goto scalef;
+        }
+        if (MP_MX(B2, pred_flag_index_l1, mxB)) {
+            goto scalef;
+        }
+    }
+    availableFlagLXB0 = 0;
+
+scalef:
+    if (!isScaledFlag_L0) {
+        if (availableFlagLXB0) {
+            availableFlagLXA0 = 1;
+            mxA = mxB;
+        }
+        availableFlagLXB0 = 0;
+
+        // XB0 and L1
+        if (is_available_b0) {
+            availableFlagLXB0 = MP_MX_LT(B0, pred_flag_index_l0, mxB);
+            if (!availableFlagLXB0)
+                availableFlagLXB0 = MP_MX_LT(B0, pred_flag_index_l1, mxB);
+        }
+
+        if (is_available_b1 && !availableFlagLXB0) {
+            availableFlagLXB0 = MP_MX_LT(B1, pred_flag_index_l0, mxB);
+            if (!availableFlagLXB0)
+                availableFlagLXB0 = MP_MX_LT(B1, pred_flag_index_l1, mxB);
+        }
+
+        if (is_available_b2 && !availableFlagLXB0) {
+            availableFlagLXB0 = MP_MX_LT(B2, pred_flag_index_l0, mxB);
+            if (!availableFlagLXB0)
+                availableFlagLXB0 = MP_MX_LT(B2, pred_flag_index_l1, mxB);
+        }
+    }
+
+    if (availableFlagLXA0)
+        mvpcand_list[numMVPCandLX++] = mxA;
+
+    if (availableFlagLXB0 && (!availableFlagLXA0 || mxA.x != mxB.x || mxA.y != mxB.y))
+        mvpcand_list[numMVPCandLX++] = mxB;
+
+    //temporal motion vector prediction candidate
+    if (numMVPCandLX < 2 && s->sh.slice_temporal_mvp_enabled_flag &&
+        mvp_lx_flag == numMVPCandLX) {
+        Mv mv_col;
+        int available_col = temporal_luma_motion_vector(s, x0, y0, nPbW,
+                                                        nPbH, ref_idx,
+                                                        &mv_col, LX);
+        if (available_col)
+            mvpcand_list[numMVPCandLX++] = mv_col;
+    }
+
+    mv->mv[LX] = mvpcand_list[mvp_lx_flag];
+}
diff --git a/libavcodec/hevc/parse.c b/libavcodec/hevc/parse.c
new file mode 100644
index 0000000000..53b040d964
--- /dev/null
+++ b/libavcodec/hevc/parse.c
@@ -0,0 +1,147 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "bytestream.h"
+#include "h2645_parse.h"
+#include "hevc.h"
+#include "parse.h"
+
+static int hevc_decode_nal_units(const uint8_t *buf, int buf_size, HEVCParamSets *ps,
+                                 HEVCSEI *sei, int is_nalff, int nal_length_size,
+                                 int err_recognition, int apply_defdispwin, void *logctx)
+{
+    int i;
+    int ret = 0;
+    H2645Packet pkt = { 0 };
+
+    ret = ff_h2645_packet_split(&pkt, buf, buf_size, logctx, is_nalff,
+                                nal_length_size, AV_CODEC_ID_HEVC, 1, 0);
+    if (ret < 0) {
+        goto done;
+    }
+
+    for (i = 0; i < pkt.nb_nals; i++) {
+        H2645NAL *nal = &pkt.nals[i];
+        if (nal->nuh_layer_id > 0)
+            continue;
+
+        /* ignore everything except parameter sets and VCL NALUs */
+        switch (nal->type) {
+        case HEVC_NAL_VPS:
+            ret = ff_hevc_decode_nal_vps(&nal->gb, logctx, ps);
+            if (ret < 0)
+                goto done;
+            break;
+        case HEVC_NAL_SPS:
+            ret = ff_hevc_decode_nal_sps(&nal->gb, logctx, ps, apply_defdispwin);
+            if (ret < 0)
+                goto done;
+            break;
+        case HEVC_NAL_PPS:
+            ret = ff_hevc_decode_nal_pps(&nal->gb, logctx, ps);
+            if (ret < 0)
+                goto done;
+            break;
+        case HEVC_NAL_SEI_PREFIX:
+        case HEVC_NAL_SEI_SUFFIX:
+            ret = ff_hevc_decode_nal_sei(&nal->gb, logctx, sei, ps, nal->type);
+            if (ret < 0)
+                goto done;
+            break;
+        default:
+            av_log(logctx, AV_LOG_VERBOSE, "Ignoring NAL type %d in extradata\n", nal->type);
+            break;
+        }
+    }
+
+done:
+    ff_h2645_packet_uninit(&pkt);
+    if (err_recognition & AV_EF_EXPLODE)
+        return ret;
+
+    return 0;
+}
+
+int ff_hevc_decode_extradata(const uint8_t *data, int size, HEVCParamSets *ps,
+                             HEVCSEI *sei, int *is_nalff, int *nal_length_size,
+                             int err_recognition, int apply_defdispwin, void *logctx)
+{
+    int ret = 0;
+    GetByteContext gb;
+
+    bytestream2_init(&gb, data, size);
+
+    /* data[0] == 1 is configurationVersion from 14496-15.
+     * data[0] == 0 is for backward compatibility predates the standard.
+     *
+     * Minimum number of bytes of hvcC with 0 numOfArrays is 23.
+     */
+    if (size >= 23 && ((data[0] == 1) || (data[0] == 0 && (data[1] || data[2] > 1)))) {
+        /* It seems the extradata is encoded as hvcC format. */
+        int i, j, num_arrays, nal_len_size;
+
+        *is_nalff = 1;
+
+        bytestream2_skip(&gb, 21);
+        nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
+        num_arrays   = bytestream2_get_byte(&gb);
+
+        /* nal units in the hvcC always have length coded with 2 bytes,
+         * so put a fake nal_length_size = 2 while parsing them */
+        *nal_length_size = 2;
+
+        /* Decode nal units from hvcC. */
+        for (i = 0; i < num_arrays; i++) {
+            int type = bytestream2_get_byte(&gb) & 0x3f;
+            int cnt  = bytestream2_get_be16(&gb);
+
+            for (j = 0; j < cnt; j++) {
+                // +2 for the nal size field
+                int nalsize = bytestream2_peek_be16(&gb) + 2;
+                if (bytestream2_get_bytes_left(&gb) < nalsize) {
+                    av_log(logctx, AV_LOG_ERROR,
+                           "Invalid NAL unit size in extradata.\n");
+                    return AVERROR_INVALIDDATA;
+                }
+
+                ret = hevc_decode_nal_units(gb.buffer, nalsize, ps, sei, *is_nalff,
+                                            *nal_length_size, err_recognition, apply_defdispwin,
+                                            logctx);
+                if (ret < 0) {
+                    av_log(logctx, AV_LOG_ERROR,
+                           "Decoding nal unit %d %d from hvcC failed\n",
+                           type, i);
+                    return ret;
+                }
+                bytestream2_skip(&gb, nalsize);
+            }
+        }
+
+        /* Now store right nal length size, that will be used to parse
+         * all other nals */
+        *nal_length_size = nal_len_size;
+    } else {
+        *is_nalff = 0;
+        ret = hevc_decode_nal_units(data, size, ps, sei, *is_nalff, *nal_length_size,
+                                    err_recognition, apply_defdispwin, logctx);
+        if (ret < 0)
+            return ret;
+    }
+
+    return ret;
+}
diff --git a/libavcodec/hevc/parse.h b/libavcodec/hevc/parse.h
new file mode 100644
index 0000000000..b3bcbde500
--- /dev/null
+++ b/libavcodec/hevc/parse.h
@@ -0,0 +1,36 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.265 parser code
+ */
+
+#ifndef AVCODEC_HEVC_PARSE_H
+#define AVCODEC_HEVC_PARSE_H
+
+#include <stdint.h>
+
+#include "ps.h"
+#include "sei.h"
+
+int ff_hevc_decode_extradata(const uint8_t *data, int size, HEVCParamSets *ps,
+                             HEVCSEI *sei, int *is_nalff, int *nal_length_size,
+                             int err_recognition, int apply_defdispwin, void *logctx);
+
+#endif /* AVCODEC_HEVC_PARSE_H */
diff --git a/libavcodec/hevc/parser.c b/libavcodec/hevc/parser.c
new file mode 100644
index 0000000000..056e1b4aa4
--- /dev/null
+++ b/libavcodec/hevc/parser.c
@@ -0,0 +1,359 @@
+/*
+ * HEVC Annex B format parser
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/mem.h"
+
+#include "golomb.h"
+#include "hevc.h"
+#include "parse.h"
+#include "ps.h"
+#include "sei.h"
+#include "h2645_parse.h"
+#include "parser.h"
+
+#define START_CODE 0x000001 ///< start_code_prefix_one_3bytes
+
+#define IS_IRAP_NAL(nal) (nal->type >= 16 && nal->type <= 23)
+#define IS_IDR_NAL(nal) (nal->type == HEVC_NAL_IDR_W_RADL || nal->type == HEVC_NAL_IDR_N_LP)
+
+typedef struct HEVCParserContext {
+    ParseContext pc;
+
+    H2645Packet pkt;
+    HEVCParamSets ps;
+    HEVCSEI sei;
+
+    int is_avc;
+    int nal_length_size;
+    int parsed_extradata;
+
+    int poc;
+    int pocTid0;
+} HEVCParserContext;
+
+static int hevc_parse_slice_header(AVCodecParserContext *s, H2645NAL *nal,
+                                   AVCodecContext *avctx)
+{
+    HEVCParserContext *ctx = s->priv_data;
+    HEVCParamSets *ps = &ctx->ps;
+    HEVCSEI *sei = &ctx->sei;
+    GetBitContext *gb = &nal->gb;
+    const HEVCWindow *ow;
+    int i, num = 0, den = 0;
+
+    unsigned int pps_id, first_slice_in_pic_flag, dependent_slice_segment_flag;
+    enum HEVCSliceType slice_type;
+
+    first_slice_in_pic_flag = get_bits1(gb);
+    s->picture_structure = sei->picture_timing.picture_struct;
+    s->field_order = sei->picture_timing.picture_struct;
+
+    if (IS_IRAP_NAL(nal)) {
+        s->key_frame = 1;
+        skip_bits1(gb); // no_output_of_prior_pics_flag
+    }
+
+    pps_id = get_ue_golomb(gb);
+    if (pps_id >= HEVC_MAX_PPS_COUNT || !ps->pps_list[pps_id]) {
+        av_log(avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id);
+        return AVERROR_INVALIDDATA;
+    }
+    ps->pps = ps->pps_list[pps_id];
+
+    if (ps->pps->sps_id >= HEVC_MAX_SPS_COUNT || !ps->sps_list[ps->pps->sps_id]) {
+        av_log(avctx, AV_LOG_ERROR, "SPS id out of range: %d\n", ps->pps->sps_id);
+        return AVERROR_INVALIDDATA;
+    }
+    if (ps->sps != ps->sps_list[ps->pps->sps_id]) {
+        ps->sps  = ps->sps_list[ps->pps->sps_id];
+        ps->vps  = ps->vps_list[ps->sps->vps_id];
+    }
+    ow  = &ps->sps->output_window;
+
+    s->coded_width  = ps->sps->width;
+    s->coded_height = ps->sps->height;
+    s->width        = ps->sps->width  - ow->left_offset - ow->right_offset;
+    s->height       = ps->sps->height - ow->top_offset  - ow->bottom_offset;
+    s->format       = ps->sps->pix_fmt;
+    avctx->profile  = ps->sps->ptl.general_ptl.profile_idc;
+    avctx->level    = ps->sps->ptl.general_ptl.level_idc;
+
+    if (ps->vps->vps_timing_info_present_flag) {
+        num = ps->vps->vps_num_units_in_tick;
+        den = ps->vps->vps_time_scale;
+    } else if (ps->sps->vui.vui_timing_info_present_flag) {
+        num = ps->sps->vui.vui_num_units_in_tick;
+        den = ps->sps->vui.vui_time_scale;
+    }
+
+    if (num != 0 && den != 0)
+        av_reduce(&avctx->framerate.den, &avctx->framerate.num,
+                  num, den, 1 << 30);
+
+    if (!first_slice_in_pic_flag) {
+        unsigned int slice_segment_addr;
+        int slice_address_length;
+
+        if (ps->pps->dependent_slice_segments_enabled_flag)
+            dependent_slice_segment_flag = get_bits1(gb);
+        else
+            dependent_slice_segment_flag = 0;
+
+        slice_address_length = av_ceil_log2_c(ps->sps->ctb_width *
+                                              ps->sps->ctb_height);
+        slice_segment_addr = get_bitsz(gb, slice_address_length);
+        if (slice_segment_addr >= ps->sps->ctb_width * ps->sps->ctb_height) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid slice segment address: %u.\n",
+                   slice_segment_addr);
+            return AVERROR_INVALIDDATA;
+        }
+    } else
+        dependent_slice_segment_flag = 0;
+
+    if (dependent_slice_segment_flag)
+        return 0; /* break; */
+
+    for (i = 0; i < ps->pps->num_extra_slice_header_bits; i++)
+        skip_bits(gb, 1); // slice_reserved_undetermined_flag[]
+
+    slice_type = get_ue_golomb_31(gb);
+    if (!(slice_type == HEVC_SLICE_I || slice_type == HEVC_SLICE_P ||
+          slice_type == HEVC_SLICE_B)) {
+        av_log(avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
+               slice_type);
+        return AVERROR_INVALIDDATA;
+    }
+    s->pict_type = slice_type == HEVC_SLICE_B ? AV_PICTURE_TYPE_B :
+                   slice_type == HEVC_SLICE_P ? AV_PICTURE_TYPE_P :
+                                                AV_PICTURE_TYPE_I;
+
+    if (ps->pps->output_flag_present_flag)
+        skip_bits1(gb); // pic_output_flag
+
+    if (ps->sps->separate_colour_plane)
+        skip_bits(gb, 2);   // colour_plane_id
+
+    if (!IS_IDR_NAL(nal)) {
+        int pic_order_cnt_lsb = get_bits(gb, ps->sps->log2_max_poc_lsb);
+        s->output_picture_number = ctx->poc =
+            ff_hevc_compute_poc(ps->sps, ctx->pocTid0, pic_order_cnt_lsb, nal->type);
+    } else
+        s->output_picture_number = ctx->poc = 0;
+
+    if (nal->temporal_id == 0 &&
+        nal->type != HEVC_NAL_TRAIL_N &&
+        nal->type != HEVC_NAL_TSA_N &&
+        nal->type != HEVC_NAL_STSA_N &&
+        nal->type != HEVC_NAL_RADL_N &&
+        nal->type != HEVC_NAL_RASL_N &&
+        nal->type != HEVC_NAL_RADL_R &&
+        nal->type != HEVC_NAL_RASL_R)
+        ctx->pocTid0 = ctx->poc;
+
+    return 1; /* no need to evaluate the rest */
+}
+
+/**
+ * Parse NAL units of found picture and decode some basic information.
+ *
+ * @param s parser context.
+ * @param avctx codec context.
+ * @param buf buffer with field/frame data.
+ * @param buf_size size of the buffer.
+ */
+static int parse_nal_units(AVCodecParserContext *s, const uint8_t *buf,
+                           int buf_size, AVCodecContext *avctx)
+{
+    HEVCParserContext *ctx = s->priv_data;
+    HEVCParamSets *ps = &ctx->ps;
+    HEVCSEI *sei = &ctx->sei;
+    int ret, i;
+
+    /* set some sane default values */
+    s->pict_type         = AV_PICTURE_TYPE_I;
+    s->key_frame         = 0;
+    s->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
+
+    ff_hevc_reset_sei(sei);
+
+    ret = ff_h2645_packet_split(&ctx->pkt, buf, buf_size, avctx, ctx->is_avc,
+                                ctx->nal_length_size, AV_CODEC_ID_HEVC, 1, 0);
+    if (ret < 0)
+        return ret;
+
+    for (i = 0; i < ctx->pkt.nb_nals; i++) {
+        H2645NAL *nal = &ctx->pkt.nals[i];
+        GetBitContext *gb = &nal->gb;
+
+        if (nal->nuh_layer_id > 0)
+            continue;
+
+        switch (nal->type) {
+        case HEVC_NAL_VPS:
+            ff_hevc_decode_nal_vps(gb, avctx, ps);
+            break;
+        case HEVC_NAL_SPS:
+            ff_hevc_decode_nal_sps(gb, avctx, ps, 1);
+            break;
+        case HEVC_NAL_PPS:
+            ff_hevc_decode_nal_pps(gb, avctx, ps);
+            break;
+        case HEVC_NAL_SEI_PREFIX:
+        case HEVC_NAL_SEI_SUFFIX:
+            ff_hevc_decode_nal_sei(gb, avctx, sei, ps, nal->type);
+            break;
+        case HEVC_NAL_TRAIL_N:
+        case HEVC_NAL_TRAIL_R:
+        case HEVC_NAL_TSA_N:
+        case HEVC_NAL_TSA_R:
+        case HEVC_NAL_STSA_N:
+        case HEVC_NAL_STSA_R:
+        case HEVC_NAL_BLA_W_LP:
+        case HEVC_NAL_BLA_W_RADL:
+        case HEVC_NAL_BLA_N_LP:
+        case HEVC_NAL_IDR_W_RADL:
+        case HEVC_NAL_IDR_N_LP:
+        case HEVC_NAL_CRA_NUT:
+        case HEVC_NAL_RADL_N:
+        case HEVC_NAL_RADL_R:
+        case HEVC_NAL_RASL_N:
+        case HEVC_NAL_RASL_R:
+            if (ctx->sei.picture_timing.picture_struct == HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING) {
+                s->repeat_pict = 1;
+            } else if (ctx->sei.picture_timing.picture_struct == HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING) {
+                s->repeat_pict = 2;
+            }
+            ret = hevc_parse_slice_header(s, nal, avctx);
+            if (ret)
+                return ret;
+            break;
+        }
+    }
+    /* didn't find a picture! */
+    av_log(avctx, AV_LOG_ERROR, "missing picture in access unit with size %d\n", buf_size);
+    return -1;
+}
+
+/**
+ * Find the end of the current frame in the bitstream.
+ * @return the position of the first byte of the next frame, or END_NOT_FOUND
+ */
+static int hevc_find_frame_end(AVCodecParserContext *s, const uint8_t *buf,
+                               int buf_size)
+{
+    HEVCParserContext *ctx = s->priv_data;
+    ParseContext       *pc = &ctx->pc;
+    int i;
+
+    for (i = 0; i < buf_size; i++) {
+        int nut;
+
+        pc->state64 = (pc->state64 << 8) | buf[i];
+
+        if (((pc->state64 >> 3 * 8) & 0xFFFFFF) != START_CODE)
+            continue;
+
+        nut = (pc->state64 >> 2 * 8 + 1) & 0x3F;
+        // Beginning of access unit
+        if ((nut >= HEVC_NAL_VPS && nut <= HEVC_NAL_EOB_NUT) || nut == HEVC_NAL_SEI_PREFIX ||
+            (nut >= 41 && nut <= 44) || (nut >= 48 && nut <= 55)) {
+            if (pc->frame_start_found) {
+                pc->frame_start_found = 0;
+                if (!((pc->state64 >> 6 * 8) & 0xFF))
+                    return i - 6;
+                return i - 5;
+            }
+        } else if (nut <= HEVC_NAL_RASL_R ||
+                   (nut >= HEVC_NAL_BLA_W_LP && nut <= HEVC_NAL_CRA_NUT)) {
+            int first_slice_segment_in_pic_flag = buf[i] >> 7;
+            if (first_slice_segment_in_pic_flag) {
+                if (!pc->frame_start_found) {
+                    pc->frame_start_found = 1;
+                } else { // First slice of next frame found
+                    pc->frame_start_found = 0;
+                    if (!((pc->state64 >> 6 * 8) & 0xFF))
+                        return i - 6;
+                    return i - 5;
+                }
+            }
+        }
+    }
+
+    return END_NOT_FOUND;
+}
+
+static int hevc_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                      const uint8_t **poutbuf, int *poutbuf_size,
+                      const uint8_t *buf, int buf_size)
+{
+    int next;
+    HEVCParserContext *ctx = s->priv_data;
+    ParseContext *pc = &ctx->pc;
+    int is_dummy_buf = !buf_size;
+    const uint8_t *dummy_buf = buf;
+
+    if (avctx->extradata && !ctx->parsed_extradata) {
+        ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size, &ctx->ps, &ctx->sei,
+                                 &ctx->is_avc, &ctx->nal_length_size, avctx->err_recognition,
+                                 1, avctx);
+        ctx->parsed_extradata = 1;
+    }
+
+    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
+        next = buf_size;
+    } else {
+        next = hevc_find_frame_end(s, buf, buf_size);
+        if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
+            *poutbuf      = NULL;
+            *poutbuf_size = 0;
+            return buf_size;
+        }
+    }
+
+    is_dummy_buf &= (dummy_buf == buf);
+
+    if (!is_dummy_buf)
+        parse_nal_units(s, buf, buf_size, avctx);
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+static void hevc_parser_close(AVCodecParserContext *s)
+{
+    HEVCParserContext *ctx = s->priv_data;
+
+    ff_hevc_ps_uninit(&ctx->ps);
+    ff_h2645_packet_uninit(&ctx->pkt);
+    ff_hevc_reset_sei(&ctx->sei);
+
+    av_freep(&ctx->pc.buffer);
+}
+
+const AVCodecParser ff_hevc_parser = {
+    .codec_ids      = { AV_CODEC_ID_HEVC },
+    .priv_data_size = sizeof(HEVCParserContext),
+    .parser_parse   = hevc_parse,
+    .parser_close   = hevc_parser_close,
+};
diff --git a/libavcodec/hevc/pred.c b/libavcodec/hevc/pred.c
new file mode 100644
index 0000000000..8d588382fa
--- /dev/null
+++ b/libavcodec/hevc/pred.c
@@ -0,0 +1,81 @@
+/*
+ * HEVC video Decoder
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "hevcdec.h"
+
+#include "pred.h"
+
+#define BIT_DEPTH 8
+#include "pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 9
+#include "pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 12
+#include "pred_template.c"
+#undef BIT_DEPTH
+
+void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth)
+{
+#undef FUNC
+#define FUNC(a, depth) a ## _ ## depth
+
+#define HEVC_PRED(depth)                                \
+    hpc->intra_pred[0]   = FUNC(intra_pred_2, depth);   \
+    hpc->intra_pred[1]   = FUNC(intra_pred_3, depth);   \
+    hpc->intra_pred[2]   = FUNC(intra_pred_4, depth);   \
+    hpc->intra_pred[3]   = FUNC(intra_pred_5, depth);   \
+    hpc->pred_planar[0]  = FUNC(pred_planar_0, depth);  \
+    hpc->pred_planar[1]  = FUNC(pred_planar_1, depth);  \
+    hpc->pred_planar[2]  = FUNC(pred_planar_2, depth);  \
+    hpc->pred_planar[3]  = FUNC(pred_planar_3, depth);  \
+    hpc->pred_dc         = FUNC(pred_dc, depth);        \
+    hpc->pred_angular[0] = FUNC(pred_angular_0, depth); \
+    hpc->pred_angular[1] = FUNC(pred_angular_1, depth); \
+    hpc->pred_angular[2] = FUNC(pred_angular_2, depth); \
+    hpc->pred_angular[3] = FUNC(pred_angular_3, depth);
+
+    switch (bit_depth) {
+    case 9:
+        HEVC_PRED(9);
+        break;
+    case 10:
+        HEVC_PRED(10);
+        break;
+    case 12:
+        HEVC_PRED(12);
+        break;
+    default:
+        HEVC_PRED(8);
+        break;
+    }
+
+#if ARCH_MIPS
+    ff_hevc_pred_init_mips(hpc, bit_depth);
+#endif
+}
diff --git a/libavcodec/hevc/pred.h b/libavcodec/hevc/pred.h
new file mode 100644
index 0000000000..b60d8176ae
--- /dev/null
+++ b/libavcodec/hevc/pred.h
@@ -0,0 +1,46 @@
+/*
+ * HEVC video Decoder
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HEVC_PRED_H
+#define AVCODEC_HEVC_PRED_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+struct HEVCLocalContext;
+
+typedef struct HEVCPredContext {
+    void (*intra_pred[4])(struct HEVCLocalContext *lc, int x0, int y0, int c_idx);
+
+    void (*pred_planar[4])(uint8_t *src, const uint8_t *top,
+                           const uint8_t *left, ptrdiff_t stride);
+    void (*pred_dc)(uint8_t *src, const uint8_t *top, const uint8_t *left,
+                    ptrdiff_t stride, int log2_size, int c_idx);
+    void (*pred_angular[4])(uint8_t *src, const uint8_t *top,
+                            const uint8_t *left, ptrdiff_t stride,
+                            int c_idx, int mode);
+} HEVCPredContext;
+
+void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth);
+void ff_hevc_pred_init_mips(HEVCPredContext *hpc, int bit_depth);
+
+#endif /* AVCODEC_HEVC_PRED_H */
diff --git a/libavcodec/hevc/pred_template.c b/libavcodec/hevc/pred_template.c
new file mode 100644
index 0000000000..fe9a22614a
--- /dev/null
+++ b/libavcodec/hevc/pred_template.c
@@ -0,0 +1,549 @@
+/*
+ * HEVC video decoder
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/pixdesc.h"
+
+#include "bit_depth_template.c"
+#include "pred.h"
+
+#define POS(x, y) src[(x) + stride * (y)]
+
+static av_always_inline void FUNC(intra_pred)(HEVCLocalContext *lc, int x0, int y0,
+                                              int log2_size, int c_idx)
+{
+#define PU(x) \
+    ((x) >> s->ps.sps->log2_min_pu_size)
+#define MVF(x, y) \
+    (s->cur_frame->tab_mvf[(x) + (y) * min_pu_width])
+#define MVF_PU(x, y) \
+    MVF(PU(x0 + ((x) * (1 << hshift))), PU(y0 + ((y) * (1 << vshift))))
+#define IS_INTRA(x, y) \
+    (MVF_PU(x, y).pred_flag == PF_INTRA)
+#define MIN_TB_ADDR_ZS(x, y) \
+    s->ps.pps->min_tb_addr_zs[(y) * (s->ps.sps->tb_mask+2) + (x)]
+#define EXTEND(ptr, val, len)         \
+do {                                  \
+    pixel4 pix = PIXEL_SPLAT_X4(val); \
+    for (i = 0; i < (len); i += 4)    \
+        AV_WN4P(ptr + i, pix);        \
+} while (0)
+
+#define EXTEND_RIGHT_CIP(ptr, start, length)                                   \
+        for (i = start; i < (start) + (length); i += 4)                        \
+            if (!IS_INTRA(i, -1))                                              \
+                AV_WN4P(&ptr[i], a);                                           \
+            else                                                               \
+                a = PIXEL_SPLAT_X4(ptr[i+3])
+#define EXTEND_LEFT_CIP(ptr, start, length) \
+        for (i = start; i > (start) - (length); i--) \
+            if (!IS_INTRA(i - 1, -1)) \
+                ptr[i - 1] = ptr[i]
+#define EXTEND_UP_CIP(ptr, start, length)                                      \
+        for (i = (start); i > (start) - (length); i -= 4)                      \
+            if (!IS_INTRA(-1, i - 3))                                          \
+                AV_WN4P(&ptr[i - 3], a);                                       \
+            else                                                               \
+                a = PIXEL_SPLAT_X4(ptr[i - 3])
+#define EXTEND_DOWN_CIP(ptr, start, length)                                    \
+        for (i = start; i < (start) + (length); i += 4)                        \
+            if (!IS_INTRA(-1, i))                                              \
+                AV_WN4P(&ptr[i], a);                                           \
+            else                                                               \
+                a = PIXEL_SPLAT_X4(ptr[i + 3])
+
+    const HEVCContext *const s = lc->parent;
+    int i;
+    int hshift = s->ps.sps->hshift[c_idx];
+    int vshift = s->ps.sps->vshift[c_idx];
+    int size = (1 << log2_size);
+    int size_in_luma_h = size << hshift;
+    int size_in_tbs_h  = size_in_luma_h >> s->ps.sps->log2_min_tb_size;
+    int size_in_luma_v = size << vshift;
+    int size_in_tbs_v  = size_in_luma_v >> s->ps.sps->log2_min_tb_size;
+    int x = x0 >> hshift;
+    int y = y0 >> vshift;
+    int x_tb = (x0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
+    int y_tb = (y0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
+    int spin = c_idx && !size_in_tbs_v && ((2 * y0) & (1 << s->ps.sps->log2_min_tb_size));
+
+    int cur_tb_addr = MIN_TB_ADDR_ZS(x_tb, y_tb);
+
+    ptrdiff_t stride = s->cur_frame->f->linesize[c_idx] / sizeof(pixel);
+    pixel *src = (pixel*)s->cur_frame->f->data[c_idx] + x + y * stride;
+
+    int min_pu_width = s->ps.sps->min_pu_width;
+
+    enum IntraPredMode mode = c_idx ? lc->tu.intra_pred_mode_c :
+                              lc->tu.intra_pred_mode;
+    pixel4 a;
+    pixel  left_array[2 * MAX_TB_SIZE + 1];
+    pixel  filtered_left_array[2 * MAX_TB_SIZE + 1];
+    pixel  top_array[2 * MAX_TB_SIZE + 1];
+    pixel  filtered_top_array[2 * MAX_TB_SIZE + 1];
+
+    pixel  *left          = left_array + 1;
+    pixel  *top           = top_array  + 1;
+    pixel  *filtered_left = filtered_left_array + 1;
+    pixel  *filtered_top  = filtered_top_array  + 1;
+    int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS( x_tb - 1, (y_tb + size_in_tbs_v + spin) & s->ps.sps->tb_mask);
+    int cand_left        = lc->na.cand_left;
+    int cand_up_left     = lc->na.cand_up_left;
+    int cand_up          = lc->na.cand_up;
+    int cand_up_right    = lc->na.cand_up_right && !spin && cur_tb_addr > MIN_TB_ADDR_ZS((x_tb + size_in_tbs_h) & s->ps.sps->tb_mask, y_tb - 1);
+
+    int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma_v, s->ps.sps->height) -
+                           (y0 + size_in_luma_v)) >> vshift;
+    int top_right_size   = (FFMIN(x0 + 2 * size_in_luma_h, s->ps.sps->width) -
+                           (x0 + size_in_luma_h)) >> hshift;
+
+    if (s->ps.pps->constrained_intra_pred_flag == 1) {
+        int size_in_luma_pu_v = PU(size_in_luma_v);
+        int size_in_luma_pu_h = PU(size_in_luma_h);
+        int on_pu_edge_x    = !av_mod_uintp2(x0, s->ps.sps->log2_min_pu_size);
+        int on_pu_edge_y    = !av_mod_uintp2(y0, s->ps.sps->log2_min_pu_size);
+        if (!size_in_luma_pu_h)
+            size_in_luma_pu_h++;
+        if (cand_bottom_left == 1 && on_pu_edge_x) {
+            int x_left_pu   = PU(x0 - 1);
+            int y_bottom_pu = PU(y0 + size_in_luma_v);
+            int max = FFMIN(size_in_luma_pu_v, s->ps.sps->min_pu_height - y_bottom_pu);
+            cand_bottom_left = 0;
+            for (i = 0; i < max; i += 2)
+                cand_bottom_left |= (MVF(x_left_pu, y_bottom_pu + i).pred_flag == PF_INTRA);
+        }
+        if (cand_left == 1 && on_pu_edge_x) {
+            int x_left_pu   = PU(x0 - 1);
+            int y_left_pu   = PU(y0);
+            int max = FFMIN(size_in_luma_pu_v, s->ps.sps->min_pu_height - y_left_pu);
+            cand_left = 0;
+            for (i = 0; i < max; i += 2)
+                cand_left |= (MVF(x_left_pu, y_left_pu + i).pred_flag == PF_INTRA);
+        }
+        if (cand_up_left == 1) {
+            int x_left_pu   = PU(x0 - 1);
+            int y_top_pu    = PU(y0 - 1);
+            cand_up_left = MVF(x_left_pu, y_top_pu).pred_flag == PF_INTRA;
+        }
+        if (cand_up == 1 && on_pu_edge_y) {
+            int x_top_pu    = PU(x0);
+            int y_top_pu    = PU(y0 - 1);
+            int max = FFMIN(size_in_luma_pu_h, s->ps.sps->min_pu_width - x_top_pu);
+            cand_up = 0;
+            for (i = 0; i < max; i += 2)
+                cand_up |= (MVF(x_top_pu + i, y_top_pu).pred_flag == PF_INTRA);
+        }
+        if (cand_up_right == 1 && on_pu_edge_y) {
+            int y_top_pu    = PU(y0 - 1);
+            int x_right_pu  = PU(x0 + size_in_luma_h);
+            int max = FFMIN(size_in_luma_pu_h, s->ps.sps->min_pu_width - x_right_pu);
+            cand_up_right = 0;
+            for (i = 0; i < max; i += 2)
+                cand_up_right |= (MVF(x_right_pu + i, y_top_pu).pred_flag == PF_INTRA);
+        }
+        memset(left, 128, 2 * MAX_TB_SIZE*sizeof(pixel));
+        memset(top , 128, 2 * MAX_TB_SIZE*sizeof(pixel));
+        top[-1] = 128;
+    }
+    if (cand_up_left) {
+        left[-1] = POS(-1, -1);
+        top[-1]  = left[-1];
+    }
+    if (cand_up)
+        memcpy(top, src - stride, size * sizeof(pixel));
+    if (cand_up_right) {
+        memcpy(top + size, src - stride + size, size * sizeof(pixel));
+        EXTEND(top + size + top_right_size, POS(size + top_right_size - 1, -1),
+               size - top_right_size);
+    }
+    if (cand_left)
+        for (i = 0; i < size; i++)
+            left[i] = POS(-1, i);
+    if (cand_bottom_left) {
+        for (i = size; i < size + bottom_left_size; i++)
+            left[i] = POS(-1, i);
+        EXTEND(left + size + bottom_left_size, POS(-1, size + bottom_left_size - 1),
+               size - bottom_left_size);
+    }
+
+    if (s->ps.pps->constrained_intra_pred_flag == 1) {
+        if (cand_bottom_left || cand_left || cand_up_left || cand_up || cand_up_right) {
+            int size_max_x = x0 + ((2 * size) << hshift) < s->ps.sps->width ?
+                                    2 * size : (s->ps.sps->width - x0) >> hshift;
+            int size_max_y = y0 + ((2 * size) << vshift) < s->ps.sps->height ?
+                                    2 * size : (s->ps.sps->height - y0) >> vshift;
+            int j = size + (cand_bottom_left? bottom_left_size: 0) -1;
+            if (!cand_up_right) {
+                size_max_x = x0 + ((size) << hshift) < s->ps.sps->width ?
+                                                    size : (s->ps.sps->width - x0) >> hshift;
+            }
+            if (!cand_bottom_left) {
+                size_max_y = y0 + (( size) << vshift) < s->ps.sps->height ?
+                                                     size : (s->ps.sps->height - y0) >> vshift;
+            }
+            if (cand_bottom_left || cand_left || cand_up_left) {
+                while (j > -1 && !IS_INTRA(-1, j))
+                    j--;
+                if (!IS_INTRA(-1, j)) {
+                    j = 0;
+                    while (j < size_max_x && !IS_INTRA(j, -1))
+                        j++;
+                    EXTEND_LEFT_CIP(top, j, j + 1);
+                    left[-1] = top[-1];
+                }
+            } else {
+                j = 0;
+                while (j < size_max_x && !IS_INTRA(j, -1))
+                    j++;
+                if (j > 0) {
+                    EXTEND_LEFT_CIP(top, j, j);
+                    top[-1] = top[0];
+                }
+                left[-1] = top[-1];
+            }
+            left[-1] = top[-1];
+            if (cand_bottom_left || cand_left) {
+                a = PIXEL_SPLAT_X4(left[-1]);
+                EXTEND_DOWN_CIP(left, 0, size_max_y);
+            }
+            if (!cand_left)
+                EXTEND(left, left[-1], size);
+            if (!cand_bottom_left)
+                EXTEND(left + size, left[size - 1], size);
+            if (x0 != 0 && y0 != 0) {
+                a = PIXEL_SPLAT_X4(left[size_max_y - 1]);
+                EXTEND_UP_CIP(left, size_max_y - 1, size_max_y);
+                if (!IS_INTRA(-1, - 1))
+                    left[-1] = left[0];
+            } else if (x0 == 0) {
+                EXTEND(left, 0, size_max_y);
+            } else {
+                a = PIXEL_SPLAT_X4(left[size_max_y - 1]);
+                EXTEND_UP_CIP(left, size_max_y - 1, size_max_y);
+            }
+            top[-1] = left[-1];
+            if (y0 != 0) {
+                a = PIXEL_SPLAT_X4(left[-1]);
+                EXTEND_RIGHT_CIP(top, 0, size_max_x);
+            }
+        }
+    }
+    // Infer the unavailable samples
+    if (!cand_bottom_left) {
+        if (cand_left) {
+            EXTEND(left + size, left[size - 1], size);
+        } else if (cand_up_left) {
+            EXTEND(left, left[-1], 2 * size);
+            cand_left = 1;
+        } else if (cand_up) {
+            left[-1] = top[0];
+            EXTEND(left, left[-1], 2 * size);
+            cand_up_left = 1;
+            cand_left    = 1;
+        } else if (cand_up_right) {
+            EXTEND(top, top[size], size);
+            left[-1] = top[size];
+            EXTEND(left, left[-1], 2 * size);
+            cand_up      = 1;
+            cand_up_left = 1;
+            cand_left    = 1;
+        } else { // No samples available
+            left[-1] = (1 << (BIT_DEPTH - 1));
+            EXTEND(top,  left[-1], 2 * size);
+            EXTEND(left, left[-1], 2 * size);
+        }
+    }
+
+    if (!cand_left)
+        EXTEND(left, left[size], size);
+    if (!cand_up_left) {
+        left[-1] = left[0];
+    }
+    if (!cand_up)
+        EXTEND(top, left[-1], size);
+    if (!cand_up_right)
+        EXTEND(top + size, top[size - 1], size);
+
+    top[-1] = left[-1];
+
+    // Filtering process
+    if (!s->ps.sps->intra_smoothing_disabled && (c_idx == 0  || s->ps.sps->chroma_format_idc == 3)) {
+        if (mode != INTRA_DC && size != 4){
+            int intra_hor_ver_dist_thresh[] = { 7, 1, 0 };
+            int min_dist_vert_hor = FFMIN(FFABS((int)(mode - 26U)),
+                                          FFABS((int)(mode - 10U)));
+            if (min_dist_vert_hor > intra_hor_ver_dist_thresh[log2_size - 3]) {
+                int threshold = 1 << (BIT_DEPTH - 5);
+                if (s->ps.sps->strong_intra_smoothing_enabled && c_idx == 0 &&
+                    log2_size == 5 &&
+                    FFABS(top[-1]  + top[63]  - 2 * top[31])  < threshold &&
+                    FFABS(left[-1] + left[63] - 2 * left[31]) < threshold) {
+                    // We can't just overwrite values in top because it could be
+                    // a pointer into src
+                    filtered_top[-1] = top[-1];
+                    filtered_top[63] = top[63];
+                    for (i = 0; i < 63; i++)
+                        filtered_top[i] = ((64 - (i + 1)) * top[-1] +
+                                           (i + 1)  * top[63] + 32) >> 6;
+                    for (i = 0; i < 63; i++)
+                        left[i] = ((64 - (i + 1)) * left[-1] +
+                                   (i + 1)  * left[63] + 32) >> 6;
+                    top = filtered_top;
+                } else {
+                    filtered_left[2 * size - 1] = left[2 * size - 1];
+                    filtered_top[2 * size - 1]  = top[2 * size - 1];
+                    for (i = 2 * size - 2; i >= 0; i--)
+                        filtered_left[i] = (left[i + 1] + 2 * left[i] +
+                                            left[i - 1] + 2) >> 2;
+                    filtered_top[-1]  =
+                    filtered_left[-1] = (left[0] + 2 * left[-1] + top[0] + 2) >> 2;
+                    for (i = 2 * size - 2; i >= 0; i--)
+                        filtered_top[i] = (top[i + 1] + 2 * top[i] +
+                                           top[i - 1] + 2) >> 2;
+                    left = filtered_left;
+                    top  = filtered_top;
+                }
+            }
+        }
+    }
+
+    switch (mode) {
+    case INTRA_PLANAR:
+        s->hpc.pred_planar[log2_size - 2]((uint8_t *)src, (uint8_t *)top,
+                                          (uint8_t *)left, stride);
+        break;
+    case INTRA_DC:
+        s->hpc.pred_dc((uint8_t *)src, (uint8_t *)top,
+                       (uint8_t *)left, stride, log2_size, c_idx);
+        break;
+    default:
+        s->hpc.pred_angular[log2_size - 2]((uint8_t *)src, (uint8_t *)top,
+                                           (uint8_t *)left, stride, c_idx,
+                                           mode);
+        break;
+    }
+}
+
+#define INTRA_PRED(size)                                                            \
+static void FUNC(intra_pred_ ## size)(HEVCLocalContext *lc, int x0, int y0, int c_idx) \
+{                                                                                   \
+    FUNC(intra_pred)(lc, x0, y0, size, c_idx);                                      \
+}
+
+INTRA_PRED(2)
+INTRA_PRED(3)
+INTRA_PRED(4)
+INTRA_PRED(5)
+
+#undef INTRA_PRED
+
+static av_always_inline void FUNC(pred_planar)(uint8_t *_src, const uint8_t *_top,
+                                  const uint8_t *_left, ptrdiff_t stride,
+                                  int trafo_size)
+{
+    int x, y;
+    pixel *src        = (pixel *)_src;
+    const pixel *top  = (const pixel *)_top;
+    const pixel *left = (const pixel *)_left;
+    int size = 1 << trafo_size;
+    for (y = 0; y < size; y++)
+        for (x = 0; x < size; x++)
+            POS(x, y) = ((size - 1 - x) * left[y] + (x + 1) * top[size]  +
+                         (size - 1 - y) * top[x]  + (y + 1) * left[size] + size) >> (trafo_size + 1);
+}
+
+#define PRED_PLANAR(size)\
+static void FUNC(pred_planar_ ## size)(uint8_t *src, const uint8_t *top,        \
+                                       const uint8_t *left, ptrdiff_t stride)   \
+{                                                                               \
+    FUNC(pred_planar)(src, top, left, stride, size + 2);                        \
+}
+
+PRED_PLANAR(0)
+PRED_PLANAR(1)
+PRED_PLANAR(2)
+PRED_PLANAR(3)
+
+#undef PRED_PLANAR
+
+static void FUNC(pred_dc)(uint8_t *_src, const uint8_t *_top,
+                          const uint8_t *_left,
+                          ptrdiff_t stride, int log2_size, int c_idx)
+{
+    int i, j, x, y;
+    int size          = (1 << log2_size);
+    pixel *src        = (pixel *)_src;
+    const pixel *top  = (const pixel *)_top;
+    const pixel *left = (const pixel *)_left;
+    int dc            = size;
+    pixel4 a;
+    for (i = 0; i < size; i++)
+        dc += left[i] + top[i];
+
+    dc >>= log2_size + 1;
+
+    a = PIXEL_SPLAT_X4(dc);
+
+    for (i = 0; i < size; i++)
+        for (j = 0; j < size; j+=4)
+            AV_WN4P(&POS(j, i), a);
+
+    if (c_idx == 0 && size < 32) {
+        POS(0, 0) = (left[0] + 2 * dc + top[0] + 2) >> 2;
+        for (x = 1; x < size; x++)
+            POS(x, 0) = (top[x] + 3 * dc + 2) >> 2;
+        for (y = 1; y < size; y++)
+            POS(0, y) = (left[y] + 3 * dc + 2) >> 2;
+    }
+}
+
+static av_always_inline void FUNC(pred_angular)(uint8_t *_src,
+                                                const uint8_t *_top,
+                                                const uint8_t *_left,
+                                                ptrdiff_t stride, int c_idx,
+                                                int mode, int size)
+{
+    int x, y;
+    pixel *src        = (pixel *)_src;
+    const pixel *top  = (const pixel *)_top;
+    const pixel *left = (const pixel *)_left;
+
+    static const int intra_pred_angle[] = {
+         32,  26,  21,  17, 13,  9,  5, 2, 0, -2, -5, -9, -13, -17, -21, -26, -32,
+        -26, -21, -17, -13, -9, -5, -2, 0, 2,  5,  9, 13,  17,  21,  26,  32
+    };
+    static const int inv_angle[] = {
+        -4096, -1638, -910, -630, -482, -390, -315, -256, -315, -390, -482,
+        -630, -910, -1638, -4096
+    };
+
+    int angle = intra_pred_angle[mode - 2];
+    pixel ref_array[3 * MAX_TB_SIZE + 4];
+    pixel *ref_tmp = ref_array + size;
+    const pixel *ref;
+    int last = (size * angle) >> 5;
+
+    if (mode >= 18) {
+        ref = top - 1;
+        if (angle < 0 && last < -1) {
+            for (x = 0; x <= size; x += 4)
+                AV_WN4P(&ref_tmp[x], AV_RN4P(&top[x - 1]));
+            for (x = last; x <= -1; x++)
+                ref_tmp[x] = left[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)];
+            ref = ref_tmp;
+        }
+
+        for (y = 0; y < size; y++) {
+            int idx  = ((y + 1) * angle) >> 5;
+            int fact = ((y + 1) * angle) & 31;
+            if (fact) {
+                for (x = 0; x < size; x += 4) {
+                    POS(x    , y) = ((32 - fact) * ref[x + idx + 1] +
+                                           fact  * ref[x + idx + 2] + 16) >> 5;
+                    POS(x + 1, y) = ((32 - fact) * ref[x + 1 + idx + 1] +
+                                           fact  * ref[x + 1 + idx + 2] + 16) >> 5;
+                    POS(x + 2, y) = ((32 - fact) * ref[x + 2 + idx + 1] +
+                                           fact  * ref[x + 2 + idx + 2] + 16) >> 5;
+                    POS(x + 3, y) = ((32 - fact) * ref[x + 3 + idx + 1] +
+                                           fact  * ref[x + 3 + idx + 2] + 16) >> 5;
+                }
+            } else {
+                for (x = 0; x < size; x += 4)
+                    AV_WN4P(&POS(x, y), AV_RN4P(&ref[x + idx + 1]));
+            }
+        }
+        if (mode == 26 && c_idx == 0 && size < 32) {
+            for (y = 0; y < size; y++)
+                POS(0, y) = av_clip_pixel(top[0] + ((left[y] - left[-1]) >> 1));
+        }
+    } else {
+        ref = left - 1;
+        if (angle < 0 && last < -1) {
+            for (x = 0; x <= size; x += 4)
+                AV_WN4P(&ref_tmp[x], AV_RN4P(&left[x - 1]));
+            for (x = last; x <= -1; x++)
+                ref_tmp[x] = top[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)];
+            ref = ref_tmp;
+        }
+
+        for (x = 0; x < size; x++) {
+            int idx  = ((x + 1) * angle) >> 5;
+            int fact = ((x + 1) * angle) & 31;
+            if (fact) {
+                for (y = 0; y < size; y++) {
+                    POS(x, y) = ((32 - fact) * ref[y + idx + 1] +
+                                       fact  * ref[y + idx + 2] + 16) >> 5;
+                }
+            } else {
+                for (y = 0; y < size; y++)
+                    POS(x, y) = ref[y + idx + 1];
+            }
+        }
+        if (mode == 10 && c_idx == 0 && size < 32) {
+            for (x = 0; x < size; x += 4) {
+                POS(x,     0) = av_clip_pixel(left[0] + ((top[x    ] - top[-1]) >> 1));
+                POS(x + 1, 0) = av_clip_pixel(left[0] + ((top[x + 1] - top[-1]) >> 1));
+                POS(x + 2, 0) = av_clip_pixel(left[0] + ((top[x + 2] - top[-1]) >> 1));
+                POS(x + 3, 0) = av_clip_pixel(left[0] + ((top[x + 3] - top[-1]) >> 1));
+            }
+        }
+    }
+}
+
+static void FUNC(pred_angular_0)(uint8_t *src, const uint8_t *top,
+                                 const uint8_t *left,
+                                 ptrdiff_t stride, int c_idx, int mode)
+{
+    FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 2);
+}
+
+static void FUNC(pred_angular_1)(uint8_t *src, const uint8_t *top,
+                                 const uint8_t *left,
+                                 ptrdiff_t stride, int c_idx, int mode)
+{
+    FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 3);
+}
+
+static void FUNC(pred_angular_2)(uint8_t *src, const uint8_t *top,
+                                 const uint8_t *left,
+                                 ptrdiff_t stride, int c_idx, int mode)
+{
+    FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 4);
+}
+
+static void FUNC(pred_angular_3)(uint8_t *src, const uint8_t *top,
+                                 const uint8_t *left,
+                                 ptrdiff_t stride, int c_idx, int mode)
+{
+    FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 5);
+}
+
+#undef EXTEND_LEFT_CIP
+#undef EXTEND_RIGHT_CIP
+#undef EXTEND_UP_CIP
+#undef EXTEND_DOWN_CIP
+#undef IS_INTRA
+#undef MVF_PU
+#undef MVF
+#undef PU
+#undef EXTEND
+#undef MIN_TB_ADDR_ZS
+#undef POS
diff --git a/libavcodec/hevc/ps.c b/libavcodec/hevc/ps.c
new file mode 100644
index 0000000000..2dd4f834a4
--- /dev/null
+++ b/libavcodec/hevc/ps.c
@@ -0,0 +1,2076 @@
+/*
+ * HEVC Parameter Set decoding
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ * Copyright (C) 2012 - 2013 Mickael Raulet
+ * Copyright (C) 2012 - 2013 Gildas Cocherel
+ * Copyright (C) 2013 Vittorio Giovara
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "golomb.h"
+#include "h2645_vui.h"
+#include "data.h"
+#include "ps.h"
+#include "refstruct.h"
+
+static const uint8_t default_scaling_list_intra[] = {
+    16, 16, 16, 16, 17, 18, 21, 24,
+    16, 16, 16, 16, 17, 19, 22, 25,
+    16, 16, 17, 18, 20, 22, 25, 29,
+    16, 16, 18, 21, 24, 27, 31, 36,
+    17, 17, 20, 24, 30, 35, 41, 47,
+    18, 19, 22, 27, 35, 44, 54, 65,
+    21, 22, 25, 31, 41, 54, 70, 88,
+    24, 25, 29, 36, 47, 65, 88, 115
+};
+
+static const uint8_t default_scaling_list_inter[] = {
+    16, 16, 16, 16, 17, 18, 20, 24,
+    16, 16, 16, 17, 18, 20, 24, 25,
+    16, 16, 17, 18, 20, 24, 25, 28,
+    16, 17, 18, 20, 24, 25, 28, 33,
+    17, 18, 20, 24, 25, 28, 33, 41,
+    18, 20, 24, 25, 28, 33, 41, 54,
+    20, 24, 25, 28, 33, 41, 54, 71,
+    24, 25, 28, 33, 41, 54, 71, 91
+};
+
+static const uint8_t hevc_sub_width_c[] = {
+    1, 2, 2, 1
+};
+
+static const uint8_t hevc_sub_height_c[] = {
+    1, 2, 1, 1
+};
+
+static void remove_pps(HEVCParamSets *s, int id)
+{
+    if (s->pps == s->pps_list[id])
+        s->pps = NULL;
+    ff_refstruct_unref(&s->pps_list[id]);
+}
+
+static void remove_sps(HEVCParamSets *s, int id)
+{
+    int i;
+    if (s->sps_list[id]) {
+        if (s->sps == s->sps_list[id])
+            s->sps = NULL;
+
+        /* drop all PPS that depend on this SPS */
+        for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
+            if (s->pps_list[i] && s->pps_list[i]->sps_id == id)
+                remove_pps(s, i);
+
+        av_assert0(!(s->sps_list[id] && s->sps == s->sps_list[id]));
+        ff_refstruct_unref(&s->sps_list[id]);
+    }
+}
+
+static void remove_vps(HEVCParamSets *s, int id)
+{
+    int i;
+    if (s->vps_list[id]) {
+        if (s->vps == s->vps_list[id])
+            s->vps = NULL;
+
+        for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
+            if (s->sps_list[i] && s->sps_list[i]->vps_id == id)
+                remove_sps(s, i);
+        ff_refstruct_unref(&s->vps_list[id]);
+    }
+}
+
+int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
+                                  ShortTermRPS *rps, const HEVCSPS *sps, int is_slice_header)
+{
+    int delta_poc;
+    int k0 = 0;
+    int k  = 0;
+    int i;
+
+    rps->used        = 0;
+    rps->rps_predict = 0;
+
+    if (rps != sps->st_rps && sps->nb_st_rps)
+        rps->rps_predict = get_bits1(gb);
+
+    if (rps->rps_predict) {
+        const ShortTermRPS *rps_ridx;
+        uint8_t used[32] = { 0 };
+        int delta_rps;
+
+        if (is_slice_header) {
+            rps->delta_idx = get_ue_golomb_long(gb) + 1;
+            if (rps->delta_idx > sps->nb_st_rps) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "Invalid value of delta_idx in slice header RPS: %d > %d.\n",
+                       rps->delta_idx, sps->nb_st_rps);
+                return AVERROR_INVALIDDATA;
+            }
+            rps_ridx = &sps->st_rps[sps->nb_st_rps - rps->delta_idx];
+            rps->rps_idx_num_delta_pocs = rps_ridx->num_delta_pocs;
+        } else
+            rps_ridx = &sps->st_rps[rps - sps->st_rps - 1];
+
+        rps->delta_rps_sign = get_bits1(gb);
+        rps->abs_delta_rps  = get_ue_golomb_long(gb) + 1;
+        if (rps->abs_delta_rps > 32768) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Invalid value of abs_delta_rps: %d\n",
+                   rps->abs_delta_rps);
+            return AVERROR_INVALIDDATA;
+        }
+        delta_rps      = (1 - (rps->delta_rps_sign << 1)) * rps->abs_delta_rps;
+        for (i = 0; i <= rps_ridx->num_delta_pocs; i++) {
+            used[k] = get_bits1(gb);
+
+            rps->use_delta = 0;
+            if (!used[k])
+                rps->use_delta = get_bits1(gb);
+
+            if (used[k] || rps->use_delta) {
+                if (i < rps_ridx->num_delta_pocs)
+                    delta_poc = delta_rps + rps_ridx->delta_poc[i];
+                else
+                    delta_poc = delta_rps;
+                rps->delta_poc[k] = delta_poc;
+                if (delta_poc < 0)
+                    k0++;
+                k++;
+            }
+        }
+
+        if (k >= FF_ARRAY_ELEMS(used)) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Invalid num_delta_pocs: %d\n", k);
+            return AVERROR_INVALIDDATA;
+        }
+
+        rps->num_delta_pocs    = k;
+        rps->num_negative_pics = k0;
+        // sort in increasing order (smallest first)
+        if (rps->num_delta_pocs != 0) {
+            int u, tmp;
+            for (i = 1; i < rps->num_delta_pocs; i++) {
+                delta_poc = rps->delta_poc[i];
+                u         = used[i];
+                for (k = i - 1; k >= 0; k--) {
+                    tmp = rps->delta_poc[k];
+                    if (delta_poc < tmp) {
+                        rps->delta_poc[k + 1] = tmp;
+                        used[k + 1]           = used[k];
+                        rps->delta_poc[k]     = delta_poc;
+                        used[k]               = u;
+                    }
+                }
+            }
+        }
+        if ((rps->num_negative_pics >> 1) != 0) {
+            int u;
+            k = rps->num_negative_pics - 1;
+            // flip the negative values to largest first
+            for (i = 0; i < rps->num_negative_pics >> 1; i++) {
+                delta_poc         = rps->delta_poc[i];
+                u                 = used[i];
+                rps->delta_poc[i] = rps->delta_poc[k];
+                used[i]           = used[k];
+                rps->delta_poc[k] = delta_poc;
+                used[k]           = u;
+                k--;
+            }
+        }
+
+        for (unsigned i = 0; i < FF_ARRAY_ELEMS(used); i++)
+            rps->used |= (uint32_t)used[i] << i;
+    } else {
+        unsigned int nb_positive_pics;
+
+        rps->num_negative_pics = get_ue_golomb_long(gb);
+        nb_positive_pics       = get_ue_golomb_long(gb);
+
+        if (rps->num_negative_pics >= HEVC_MAX_REFS ||
+            nb_positive_pics >= HEVC_MAX_REFS) {
+            av_log(avctx, AV_LOG_ERROR, "Too many refs in a short term RPS.\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        rps->num_delta_pocs = rps->num_negative_pics + nb_positive_pics;
+        if (rps->num_delta_pocs) {
+            int prev = 0;
+
+            for (i = 0; i < rps->num_negative_pics; i++) {
+                delta_poc = get_ue_golomb_long(gb) + 1;
+                if (delta_poc < 1 || delta_poc > 32768) {
+                    av_log(avctx, AV_LOG_ERROR,
+                        "Invalid value of delta_poc: %d\n",
+                        delta_poc);
+                    return AVERROR_INVALIDDATA;
+                }
+                prev -= delta_poc;
+                rps->delta_poc[i] = prev;
+                rps->used        |= get_bits1(gb) * (1 << i);
+            }
+            prev = 0;
+            for (i = 0; i < nb_positive_pics; i++) {
+                delta_poc = get_ue_golomb_long(gb) + 1;
+                if (delta_poc < 1 || delta_poc > 32768) {
+                    av_log(avctx, AV_LOG_ERROR,
+                        "Invalid value of delta_poc: %d\n",
+                        delta_poc);
+                    return AVERROR_INVALIDDATA;
+                }
+                prev += delta_poc;
+                rps->delta_poc[rps->num_negative_pics + i] = prev;
+                rps->used                                 |= get_bits1(gb) * (1 << (rps->num_negative_pics + i));
+            }
+        }
+    }
+    return 0;
+}
+
+
+static int decode_profile_tier_level(GetBitContext *gb, AVCodecContext *avctx,
+                                      PTLCommon *ptl)
+{
+    int i;
+
+    if (get_bits_left(gb) < 2+1+5 + 32 + 4 + 43 + 1)
+        return -1;
+
+    ptl->profile_space = get_bits(gb, 2);
+    ptl->tier_flag     = get_bits1(gb);
+    ptl->profile_idc   = get_bits(gb, 5);
+    if (ptl->profile_idc == AV_PROFILE_HEVC_MAIN)
+        av_log(avctx, AV_LOG_DEBUG, "Main profile bitstream\n");
+    else if (ptl->profile_idc == AV_PROFILE_HEVC_MAIN_10)
+        av_log(avctx, AV_LOG_DEBUG, "Main 10 profile bitstream\n");
+    else if (ptl->profile_idc == AV_PROFILE_HEVC_MAIN_STILL_PICTURE)
+        av_log(avctx, AV_LOG_DEBUG, "Main Still Picture profile bitstream\n");
+    else if (ptl->profile_idc == AV_PROFILE_HEVC_REXT)
+        av_log(avctx, AV_LOG_DEBUG, "Range Extension profile bitstream\n");
+    else if (ptl->profile_idc == AV_PROFILE_HEVC_SCC)
+        av_log(avctx, AV_LOG_DEBUG, "Screen Content Coding Extension profile bitstream\n");
+    else
+        av_log(avctx, AV_LOG_WARNING, "Unknown HEVC profile: %d\n", ptl->profile_idc);
+
+    for (i = 0; i < 32; i++) {
+        ptl->profile_compatibility_flag[i] = get_bits1(gb);
+
+        if (ptl->profile_idc == 0 && i > 0 && ptl->profile_compatibility_flag[i])
+            ptl->profile_idc = i;
+    }
+    ptl->progressive_source_flag    = get_bits1(gb);
+    ptl->interlaced_source_flag     = get_bits1(gb);
+    ptl->non_packed_constraint_flag = get_bits1(gb);
+    ptl->frame_only_constraint_flag = get_bits1(gb);
+
+#define check_profile_idc(idc) \
+        ptl->profile_idc == idc || ptl->profile_compatibility_flag[idc]
+
+    if (check_profile_idc(4) || check_profile_idc(5) || check_profile_idc(6) ||
+        check_profile_idc(7) || check_profile_idc(8) || check_profile_idc(9) ||
+        check_profile_idc(10)) {
+
+        ptl->max_12bit_constraint_flag        = get_bits1(gb);
+        ptl->max_10bit_constraint_flag        = get_bits1(gb);
+        ptl->max_8bit_constraint_flag         = get_bits1(gb);
+        ptl->max_422chroma_constraint_flag    = get_bits1(gb);
+        ptl->max_420chroma_constraint_flag    = get_bits1(gb);
+        ptl->max_monochrome_constraint_flag   = get_bits1(gb);
+        ptl->intra_constraint_flag            = get_bits1(gb);
+        ptl->one_picture_only_constraint_flag = get_bits1(gb);
+        ptl->lower_bit_rate_constraint_flag   = get_bits1(gb);
+
+        if (check_profile_idc(5) || check_profile_idc(9) || check_profile_idc(10)) {
+            ptl->max_14bit_constraint_flag    = get_bits1(gb);
+            skip_bits_long(gb, 33); // XXX_reserved_zero_33bits[0..32]
+        } else {
+            skip_bits_long(gb, 34); // XXX_reserved_zero_34bits[0..33]
+        }
+    } else if (check_profile_idc(2)) {
+        skip_bits(gb, 7);
+        ptl->one_picture_only_constraint_flag = get_bits1(gb);
+        skip_bits_long(gb, 35); // XXX_reserved_zero_35bits[0..34]
+    } else {
+        skip_bits_long(gb, 43); // XXX_reserved_zero_43bits[0..42]
+    }
+
+    if (check_profile_idc(1) || check_profile_idc(2) || check_profile_idc(3) ||
+        check_profile_idc(4) || check_profile_idc(5) || check_profile_idc(9))
+        ptl->inbld_flag = get_bits1(gb);
+    else
+        skip_bits1(gb);
+#undef check_profile_idc
+
+    return 0;
+}
+
+static int parse_ptl(GetBitContext *gb, AVCodecContext *avctx,
+                      PTL *ptl, int max_num_sub_layers)
+{
+    int i;
+    if (decode_profile_tier_level(gb, avctx, &ptl->general_ptl) < 0 ||
+        get_bits_left(gb) < 8 + (8*2 * (max_num_sub_layers - 1 > 0))) {
+        av_log(avctx, AV_LOG_ERROR, "PTL information too short\n");
+        return -1;
+    }
+
+    ptl->general_ptl.level_idc = get_bits(gb, 8);
+
+    for (i = 0; i < max_num_sub_layers - 1; i++) {
+        ptl->sub_layer_profile_present_flag[i] = get_bits1(gb);
+        ptl->sub_layer_level_present_flag[i]   = get_bits1(gb);
+    }
+
+    if (max_num_sub_layers - 1> 0)
+        for (i = max_num_sub_layers - 1; i < 8; i++)
+            skip_bits(gb, 2); // reserved_zero_2bits[i]
+    for (i = 0; i < max_num_sub_layers - 1; i++) {
+        if (ptl->sub_layer_profile_present_flag[i] &&
+            decode_profile_tier_level(gb, avctx, &ptl->sub_layer_ptl[i]) < 0) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "PTL information for sublayer %i too short\n", i);
+            return -1;
+        }
+        if (ptl->sub_layer_level_present_flag[i]) {
+            if (get_bits_left(gb) < 8) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "Not enough data for sublayer %i level_idc\n", i);
+                return -1;
+            } else
+                ptl->sub_layer_ptl[i].level_idc = get_bits(gb, 8);
+        }
+    }
+
+    return 0;
+}
+
+static void decode_sublayer_hrd(GetBitContext *gb, unsigned int nb_cpb,
+                                HEVCSublayerHdrParams *par, int subpic_params_present)
+{
+    int i;
+
+    for (i = 0; i < nb_cpb; i++) {
+        par->bit_rate_value_minus1[i] = get_ue_golomb_long(gb);
+        par->cpb_size_value_minus1[i] = get_ue_golomb_long(gb);
+
+        if (subpic_params_present) {
+            par->cpb_size_du_value_minus1[i] = get_ue_golomb_long(gb);
+            par->bit_rate_du_value_minus1[i] = get_ue_golomb_long(gb);
+        }
+
+        par->cbr_flag |= get_bits1(gb) << i;
+    }
+}
+
+static int decode_hrd(GetBitContext *gb, int common_inf_present,
+                      HEVCHdrParams *hdr, int max_sublayers)
+{
+    if (common_inf_present) {
+        hdr->nal_hrd_parameters_present_flag = get_bits1(gb);
+        hdr->vcl_hrd_parameters_present_flag = get_bits1(gb);
+
+        if (hdr->nal_hrd_parameters_present_flag ||
+            hdr->vcl_hrd_parameters_present_flag) {
+            hdr->sub_pic_hrd_params_present_flag = get_bits1(gb);
+
+            if (hdr->sub_pic_hrd_params_present_flag) {
+                hdr->tick_divisor_minus2 = get_bits(gb, 8);
+                hdr->du_cpb_removal_delay_increment_length_minus1 = get_bits(gb, 5);
+                hdr->sub_pic_cpb_params_in_pic_timing_sei_flag = get_bits1(gb);
+                hdr->dpb_output_delay_du_length_minus1 = get_bits(gb, 5);
+            }
+
+            hdr->bit_rate_scale = get_bits(gb, 4);
+            hdr->cpb_size_scale = get_bits(gb, 4);
+
+            if (hdr->sub_pic_hrd_params_present_flag)
+                hdr->cpb_size_du_scale = get_bits(gb, 4);
+
+            hdr->initial_cpb_removal_delay_length_minus1 = get_bits(gb, 5);
+            hdr->au_cpb_removal_delay_length_minus1 = get_bits(gb, 5);
+            hdr->dpb_output_delay_length_minus1 = get_bits(gb, 5);
+        }
+    }
+
+    for (int i = 0; i < max_sublayers; i++) {
+        unsigned fixed_pic_rate_general_flag = get_bits1(gb);
+        unsigned fixed_pic_rate_within_cvs_flag = 0;
+        unsigned low_delay_hrd_flag = 0;
+        hdr->flags.fixed_pic_rate_general_flag |= fixed_pic_rate_general_flag << i;
+
+        if (!fixed_pic_rate_general_flag)
+            fixed_pic_rate_within_cvs_flag = get_bits1(gb);
+        hdr->flags.fixed_pic_rate_within_cvs_flag |= fixed_pic_rate_within_cvs_flag << i;
+
+        if (fixed_pic_rate_within_cvs_flag || fixed_pic_rate_general_flag)
+            hdr->elemental_duration_in_tc_minus1[i] = get_ue_golomb_long(gb);
+        else
+            low_delay_hrd_flag = get_bits1(gb);
+        hdr->flags.low_delay_hrd_flag |= low_delay_hrd_flag << i;
+
+        if (!low_delay_hrd_flag) {
+            unsigned cpb_cnt_minus1 = get_ue_golomb_long(gb);
+            if (cpb_cnt_minus1 > 31) {
+                av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n",
+                       cpb_cnt_minus1);
+                return AVERROR_INVALIDDATA;
+            }
+            hdr->cpb_cnt_minus1[i] = cpb_cnt_minus1;
+        }
+
+        if (hdr->nal_hrd_parameters_present_flag)
+            decode_sublayer_hrd(gb, hdr->cpb_cnt_minus1[i]+1, &hdr->nal_params[i],
+                                hdr->sub_pic_hrd_params_present_flag);
+
+        if (hdr->vcl_hrd_parameters_present_flag)
+            decode_sublayer_hrd(gb, hdr->cpb_cnt_minus1[i]+1, &hdr->vcl_params[i],
+                                hdr->sub_pic_hrd_params_present_flag);
+    }
+
+    return 0;
+}
+
+static void hevc_vps_free(FFRefStructOpaque opaque, void *obj)
+{
+    HEVCVPS *vps = obj;
+
+    av_freep(&vps->hdr);
+    av_freep(&vps->data);
+}
+
+int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
+                           HEVCParamSets *ps)
+{
+    int i,j;
+    int vps_id = get_bits(gb, 4);
+    ptrdiff_t nal_size = gb->buffer_end - gb->buffer;
+    int ret = AVERROR_INVALIDDATA;
+    HEVCVPS *vps;
+
+    if (ps->vps_list[vps_id]) {
+        const HEVCVPS *vps1 = ps->vps_list[vps_id];
+        if (vps1->data_size == nal_size &&
+            !memcmp(vps1->data, gb->buffer, vps1->data_size))
+            return 0;
+    }
+
+    vps = ff_refstruct_alloc_ext(sizeof(*vps), 0, NULL, hevc_vps_free);
+    if (!vps)
+        return AVERROR(ENOMEM);
+
+    av_log(avctx, AV_LOG_DEBUG, "Decoding VPS\n");
+
+    vps->data_size = nal_size;
+    vps->data = av_memdup(gb->buffer, nal_size);
+    if (!vps->data) {
+        ret = AVERROR(ENOMEM);
+        goto err;
+    }
+    vps->vps_id = vps_id;
+
+    if (get_bits(gb, 2) != 3) { // vps_reserved_three_2bits
+        av_log(avctx, AV_LOG_ERROR, "vps_reserved_three_2bits is not three\n");
+        goto err;
+    }
+
+    vps->vps_max_layers               = get_bits(gb, 6) + 1;
+    vps->vps_max_sub_layers           = get_bits(gb, 3) + 1;
+    vps->vps_temporal_id_nesting_flag = get_bits1(gb);
+
+    if (get_bits(gb, 16) != 0xffff) { // vps_reserved_ffff_16bits
+        av_log(avctx, AV_LOG_ERROR, "vps_reserved_ffff_16bits is not 0xffff\n");
+        goto err;
+    }
+
+    if (vps->vps_max_sub_layers > HEVC_MAX_SUB_LAYERS) {
+        av_log(avctx, AV_LOG_ERROR, "vps_max_sub_layers out of range: %d\n",
+               vps->vps_max_sub_layers);
+        goto err;
+    }
+
+    if (parse_ptl(gb, avctx, &vps->ptl, vps->vps_max_sub_layers) < 0)
+        goto err;
+
+    vps->vps_sub_layer_ordering_info_present_flag = get_bits1(gb);
+
+    i = vps->vps_sub_layer_ordering_info_present_flag ? 0 : vps->vps_max_sub_layers - 1;
+    for (; i < vps->vps_max_sub_layers; i++) {
+        vps->vps_max_dec_pic_buffering[i] = get_ue_golomb_long(gb) + 1;
+        vps->vps_num_reorder_pics[i]      = get_ue_golomb_long(gb);
+        vps->vps_max_latency_increase[i]  = get_ue_golomb_long(gb) - 1;
+
+        if (vps->vps_max_dec_pic_buffering[i] > HEVC_MAX_DPB_SIZE || !vps->vps_max_dec_pic_buffering[i]) {
+            av_log(avctx, AV_LOG_ERROR, "vps_max_dec_pic_buffering_minus1 out of range: %d\n",
+                   vps->vps_max_dec_pic_buffering[i] - 1);
+            goto err;
+        }
+        if (vps->vps_num_reorder_pics[i] > vps->vps_max_dec_pic_buffering[i] - 1) {
+            av_log(avctx, AV_LOG_WARNING, "vps_max_num_reorder_pics out of range: %d\n",
+                   vps->vps_num_reorder_pics[i]);
+            if (avctx->err_recognition & AV_EF_EXPLODE)
+                goto err;
+        }
+    }
+
+    vps->vps_max_layer_id   = get_bits(gb, 6);
+    vps->vps_num_layer_sets = get_ue_golomb_long(gb) + 1;
+    if (vps->vps_num_layer_sets < 1 || vps->vps_num_layer_sets > 1024 ||
+        (vps->vps_num_layer_sets - 1LL) * (vps->vps_max_layer_id + 1LL) > get_bits_left(gb)) {
+        av_log(avctx, AV_LOG_ERROR, "too many layer_id_included_flags\n");
+        goto err;
+    }
+
+    for (i = 1; i < vps->vps_num_layer_sets; i++)
+        for (j = 0; j <= vps->vps_max_layer_id; j++)
+            skip_bits(gb, 1);  // layer_id_included_flag[i][j]
+
+    vps->vps_timing_info_present_flag = get_bits1(gb);
+    if (vps->vps_timing_info_present_flag) {
+        vps->vps_num_units_in_tick               = get_bits_long(gb, 32);
+        vps->vps_time_scale                      = get_bits_long(gb, 32);
+        vps->vps_poc_proportional_to_timing_flag = get_bits1(gb);
+        if (vps->vps_poc_proportional_to_timing_flag)
+            vps->vps_num_ticks_poc_diff_one = get_ue_golomb_long(gb) + 1;
+        vps->vps_num_hrd_parameters = get_ue_golomb_long(gb);
+        if (vps->vps_num_hrd_parameters > (unsigned)vps->vps_num_layer_sets) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "vps_num_hrd_parameters %d is invalid\n", vps->vps_num_hrd_parameters);
+            goto err;
+        }
+
+        if (vps->vps_num_hrd_parameters) {
+            vps->hdr = av_calloc(vps->vps_num_hrd_parameters, sizeof(*vps->hdr));
+            if (!vps->hdr)
+                goto err;
+        }
+
+        for (i = 0; i < vps->vps_num_hrd_parameters; i++) {
+            int common_inf_present = 1;
+
+            get_ue_golomb_long(gb); // hrd_layer_set_idx
+            if (i)
+                common_inf_present = get_bits1(gb);
+            decode_hrd(gb, common_inf_present, &vps->hdr[i],
+                       vps->vps_max_sub_layers);
+        }
+    }
+    get_bits1(gb); /* vps_extension_flag */
+
+    if (get_bits_left(gb) < 0) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Overread VPS by %d bits\n", -get_bits_left(gb));
+        if (ps->vps_list[vps_id])
+            goto err;
+    }
+
+    remove_vps(ps, vps_id);
+    ps->vps_list[vps_id] = vps;
+
+    return 0;
+
+err:
+    ff_refstruct_unref(&vps);
+    return ret;
+}
+
+static void decode_vui(GetBitContext *gb, AVCodecContext *avctx,
+                       int apply_defdispwin, HEVCSPS *sps)
+{
+    VUI backup_vui, *vui = &sps->vui;
+    GetBitContext backup;
+    int alt = 0;
+
+    ff_h2645_decode_common_vui_params(gb, &sps->vui.common, avctx);
+
+    if (vui->common.video_signal_type_present_flag) {
+        if (vui->common.video_full_range_flag && sps->pix_fmt == AV_PIX_FMT_YUV420P)
+            sps->pix_fmt = AV_PIX_FMT_YUVJ420P;
+        if (vui->common.colour_description_present_flag) {
+            if (vui->common.matrix_coeffs == AVCOL_SPC_RGB) {
+                switch (sps->pix_fmt) {
+                case AV_PIX_FMT_YUV444P:
+                    sps->pix_fmt = AV_PIX_FMT_GBRP;
+                    break;
+                case AV_PIX_FMT_YUV444P10:
+                    sps->pix_fmt = AV_PIX_FMT_GBRP10;
+                    break;
+                case AV_PIX_FMT_YUV444P12:
+                    sps->pix_fmt = AV_PIX_FMT_GBRP12;
+                    break;
+                }
+            }
+        }
+    }
+
+    vui->neutra_chroma_indication_flag = get_bits1(gb);
+    vui->field_seq_flag                = get_bits1(gb);
+    vui->frame_field_info_present_flag = get_bits1(gb);
+
+    // Backup context in case an alternate header is detected
+    memcpy(&backup, gb, sizeof(backup));
+    memcpy(&backup_vui, vui, sizeof(backup_vui));
+    if (get_bits_left(gb) >= 68 && show_bits(gb, 21) == 0x100000) {
+        vui->default_display_window_flag = 0;
+        av_log(avctx, AV_LOG_WARNING, "Invalid default display window\n");
+    } else
+        vui->default_display_window_flag = get_bits1(gb);
+
+    if (vui->default_display_window_flag) {
+        int vert_mult  = hevc_sub_height_c[sps->chroma_format_idc];
+        int horiz_mult = hevc_sub_width_c[sps->chroma_format_idc];
+        vui->def_disp_win.left_offset   = get_ue_golomb_long(gb) * horiz_mult;
+        vui->def_disp_win.right_offset  = get_ue_golomb_long(gb) * horiz_mult;
+        vui->def_disp_win.top_offset    = get_ue_golomb_long(gb) *  vert_mult;
+        vui->def_disp_win.bottom_offset = get_ue_golomb_long(gb) *  vert_mult;
+
+        if (apply_defdispwin &&
+            avctx->flags2 & AV_CODEC_FLAG2_IGNORE_CROP) {
+            av_log(avctx, AV_LOG_DEBUG,
+                   "discarding vui default display window, "
+                   "original values are l:%u r:%u t:%u b:%u\n",
+                   vui->def_disp_win.left_offset,
+                   vui->def_disp_win.right_offset,
+                   vui->def_disp_win.top_offset,
+                   vui->def_disp_win.bottom_offset);
+
+            vui->def_disp_win.left_offset   =
+            vui->def_disp_win.right_offset  =
+            vui->def_disp_win.top_offset    =
+            vui->def_disp_win.bottom_offset = 0;
+        }
+    }
+
+timing_info:
+    vui->vui_timing_info_present_flag = get_bits1(gb);
+
+    if (vui->vui_timing_info_present_flag) {
+        if( get_bits_left(gb) < 66 && !alt) {
+            // The alternate syntax seem to have timing info located
+            // at where def_disp_win is normally located
+            av_log(avctx, AV_LOG_WARNING,
+                   "Strange VUI timing information, retrying...\n");
+            memcpy(vui, &backup_vui, sizeof(backup_vui));
+            memcpy(gb, &backup, sizeof(backup));
+            alt = 1;
+            goto timing_info;
+        }
+        vui->vui_num_units_in_tick               = get_bits_long(gb, 32);
+        vui->vui_time_scale                      = get_bits_long(gb, 32);
+        if (alt) {
+            av_log(avctx, AV_LOG_INFO, "Retry got %"PRIu32"/%"PRIu32" fps\n",
+                   vui->vui_time_scale, vui->vui_num_units_in_tick);
+        }
+        vui->vui_poc_proportional_to_timing_flag = get_bits1(gb);
+        if (vui->vui_poc_proportional_to_timing_flag)
+            vui->vui_num_ticks_poc_diff_one_minus1 = get_ue_golomb_long(gb);
+        vui->vui_hrd_parameters_present_flag = get_bits1(gb);
+        if (vui->vui_hrd_parameters_present_flag)
+            decode_hrd(gb, 1, &sps->hdr, sps->max_sub_layers);
+    }
+
+    vui->bitstream_restriction_flag = get_bits1(gb);
+    if (vui->bitstream_restriction_flag) {
+        if (get_bits_left(gb) < 8 && !alt) {
+            av_log(avctx, AV_LOG_WARNING,
+                   "Strange VUI bitstream restriction information, retrying"
+                   " from timing information...\n");
+            memcpy(vui, &backup_vui, sizeof(backup_vui));
+            memcpy(gb, &backup, sizeof(backup));
+            alt = 1;
+            goto timing_info;
+        }
+        vui->tiles_fixed_structure_flag              = get_bits1(gb);
+        vui->motion_vectors_over_pic_boundaries_flag = get_bits1(gb);
+        vui->restricted_ref_pic_lists_flag           = get_bits1(gb);
+        vui->min_spatial_segmentation_idc            = get_ue_golomb_long(gb);
+        vui->max_bytes_per_pic_denom                 = get_ue_golomb_long(gb);
+        vui->max_bits_per_min_cu_denom               = get_ue_golomb_long(gb);
+        vui->log2_max_mv_length_horizontal           = get_ue_golomb_long(gb);
+        vui->log2_max_mv_length_vertical             = get_ue_golomb_long(gb);
+    }
+
+    if (get_bits_left(gb) < 1 && !alt) {
+        // XXX: Alternate syntax when sps_range_extension_flag != 0?
+        av_log(avctx, AV_LOG_WARNING,
+               "Overread in VUI, retrying from timing information...\n");
+        memcpy(vui, &backup_vui, sizeof(backup_vui));
+        memcpy(gb, &backup, sizeof(backup));
+        alt = 1;
+        goto timing_info;
+    }
+}
+
+static void set_default_scaling_list_data(ScalingList *sl)
+{
+    int matrixId;
+
+    for (matrixId = 0; matrixId < 6; matrixId++) {
+        // 4x4 default is 16
+        memset(sl->sl[0][matrixId], 16, 16);
+        sl->sl_dc[0][matrixId] = 16; // default for 16x16
+        sl->sl_dc[1][matrixId] = 16; // default for 32x32
+    }
+    memcpy(sl->sl[1][0], default_scaling_list_intra, 64);
+    memcpy(sl->sl[1][1], default_scaling_list_intra, 64);
+    memcpy(sl->sl[1][2], default_scaling_list_intra, 64);
+    memcpy(sl->sl[1][3], default_scaling_list_inter, 64);
+    memcpy(sl->sl[1][4], default_scaling_list_inter, 64);
+    memcpy(sl->sl[1][5], default_scaling_list_inter, 64);
+    memcpy(sl->sl[2][0], default_scaling_list_intra, 64);
+    memcpy(sl->sl[2][1], default_scaling_list_intra, 64);
+    memcpy(sl->sl[2][2], default_scaling_list_intra, 64);
+    memcpy(sl->sl[2][3], default_scaling_list_inter, 64);
+    memcpy(sl->sl[2][4], default_scaling_list_inter, 64);
+    memcpy(sl->sl[2][5], default_scaling_list_inter, 64);
+    memcpy(sl->sl[3][0], default_scaling_list_intra, 64);
+    memcpy(sl->sl[3][1], default_scaling_list_intra, 64);
+    memcpy(sl->sl[3][2], default_scaling_list_intra, 64);
+    memcpy(sl->sl[3][3], default_scaling_list_inter, 64);
+    memcpy(sl->sl[3][4], default_scaling_list_inter, 64);
+    memcpy(sl->sl[3][5], default_scaling_list_inter, 64);
+}
+
+static int scaling_list_data(GetBitContext *gb, AVCodecContext *avctx,
+                             ScalingList *sl, const HEVCSPS *sps)
+{
+    uint8_t scaling_list_pred_mode_flag;
+    uint8_t scaling_list_dc_coef[2][6];
+    int size_id, matrix_id, pos;
+    int i;
+
+    for (size_id = 0; size_id < 4; size_id++)
+        for (matrix_id = 0; matrix_id < 6; matrix_id += ((size_id == 3) ? 3 : 1)) {
+            scaling_list_pred_mode_flag = get_bits1(gb);
+            if (!scaling_list_pred_mode_flag) {
+                unsigned int delta = get_ue_golomb_long(gb);
+                /* Only need to handle non-zero delta. Zero means default,
+                 * which should already be in the arrays. */
+                if (delta) {
+                    // Copy from previous array.
+                    delta *= (size_id == 3) ? 3 : 1;
+                    if (matrix_id < delta) {
+                        av_log(avctx, AV_LOG_ERROR,
+                               "Invalid delta in scaling list data: %d.\n", delta);
+                        return AVERROR_INVALIDDATA;
+                    }
+
+                    memcpy(sl->sl[size_id][matrix_id],
+                           sl->sl[size_id][matrix_id - delta],
+                           size_id > 0 ? 64 : 16);
+                    if (size_id > 1)
+                        sl->sl_dc[size_id - 2][matrix_id] = sl->sl_dc[size_id - 2][matrix_id - delta];
+                }
+            } else {
+                int next_coef, coef_num;
+                int32_t scaling_list_delta_coef;
+
+                next_coef = 8;
+                coef_num  = FFMIN(64, 1 << (4 + (size_id << 1)));
+                if (size_id > 1) {
+                    int scaling_list_coeff_minus8 = get_se_golomb(gb);
+                    if (scaling_list_coeff_minus8 < -7 ||
+                        scaling_list_coeff_minus8 > 247)
+                        return AVERROR_INVALIDDATA;
+                    scaling_list_dc_coef[size_id - 2][matrix_id] = scaling_list_coeff_minus8 + 8;
+                    next_coef = scaling_list_dc_coef[size_id - 2][matrix_id];
+                    sl->sl_dc[size_id - 2][matrix_id] = next_coef;
+                }
+                for (i = 0; i < coef_num; i++) {
+                    if (size_id == 0)
+                        pos = 4 * ff_hevc_diag_scan4x4_y[i] +
+                                  ff_hevc_diag_scan4x4_x[i];
+                    else
+                        pos = 8 * ff_hevc_diag_scan8x8_y[i] +
+                                  ff_hevc_diag_scan8x8_x[i];
+
+                    scaling_list_delta_coef = get_se_golomb(gb);
+                    next_coef = (next_coef + 256U + scaling_list_delta_coef) % 256;
+                    sl->sl[size_id][matrix_id][pos] = next_coef;
+                }
+            }
+        }
+
+    if (sps->chroma_format_idc == 3) {
+        for (i = 0; i < 64; i++) {
+            sl->sl[3][1][i] = sl->sl[2][1][i];
+            sl->sl[3][2][i] = sl->sl[2][2][i];
+            sl->sl[3][4][i] = sl->sl[2][4][i];
+            sl->sl[3][5][i] = sl->sl[2][5][i];
+        }
+        sl->sl_dc[1][1] = sl->sl_dc[0][1];
+        sl->sl_dc[1][2] = sl->sl_dc[0][2];
+        sl->sl_dc[1][4] = sl->sl_dc[0][4];
+        sl->sl_dc[1][5] = sl->sl_dc[0][5];
+    }
+
+
+    return 0;
+}
+
+static int map_pixel_format(AVCodecContext *avctx, HEVCSPS *sps)
+{
+    const AVPixFmtDescriptor *desc;
+    switch (sps->bit_depth) {
+    case 8:
+        if (sps->chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY8;
+        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P;
+        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P;
+        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P;
+       break;
+    case 9:
+        if (sps->chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY9;
+        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P9;
+        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P9;
+        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P9;
+        break;
+    case 10:
+        if (sps->chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY10;
+        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P10;
+        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P10;
+        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P10;
+        break;
+    case 12:
+        if (sps->chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY12;
+        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P12;
+        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P12;
+        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P12;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR,
+               "The following bit-depths are currently specified: 8, 9, 10 and 12 bits, "
+               "chroma_format_idc is %d, depth is %d\n",
+               sps->chroma_format_idc, sps->bit_depth);
+        return AVERROR_INVALIDDATA;
+    }
+
+    desc = av_pix_fmt_desc_get(sps->pix_fmt);
+    if (!desc)
+        return AVERROR(EINVAL);
+
+    sps->hshift[0] = sps->vshift[0] = 0;
+    sps->hshift[2] = sps->hshift[1] = desc->log2_chroma_w;
+    sps->vshift[2] = sps->vshift[1] = desc->log2_chroma_h;
+
+    sps->pixel_shift = sps->bit_depth > 8;
+
+    return 0;
+}
+
+int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
+                      int apply_defdispwin, const HEVCVPS * const *vps_list,
+                      AVCodecContext *avctx)
+{
+    HEVCWindow *ow;
+    int ret = 0;
+    int bit_depth_chroma, start, num_comps;
+    int i;
+
+    // Coded parameters
+
+    sps->vps_id = get_bits(gb, 4);
+
+    if (vps_list && !vps_list[sps->vps_id]) {
+        av_log(avctx, AV_LOG_ERROR, "VPS %d does not exist\n",
+               sps->vps_id);
+        return AVERROR_INVALIDDATA;
+    }
+
+    sps->max_sub_layers = get_bits(gb, 3) + 1;
+    if (sps->max_sub_layers > HEVC_MAX_SUB_LAYERS) {
+        av_log(avctx, AV_LOG_ERROR, "sps_max_sub_layers out of range: %d\n",
+               sps->max_sub_layers);
+        return AVERROR_INVALIDDATA;
+    }
+
+    sps->temporal_id_nesting = get_bits(gb, 1);
+
+    if ((ret = parse_ptl(gb, avctx, &sps->ptl, sps->max_sub_layers)) < 0)
+        return ret;
+
+    *sps_id = get_ue_golomb_long(gb);
+    if (*sps_id >= HEVC_MAX_SPS_COUNT) {
+        av_log(avctx, AV_LOG_ERROR, "SPS id out of range: %d\n", *sps_id);
+        return AVERROR_INVALIDDATA;
+    }
+
+    sps->chroma_format_idc = get_ue_golomb_long(gb);
+    if (sps->chroma_format_idc > 3U) {
+        av_log(avctx, AV_LOG_ERROR, "chroma_format_idc %d is invalid\n", sps->chroma_format_idc);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (sps->chroma_format_idc == 3)
+        sps->separate_colour_plane = get_bits1(gb);
+
+    if (sps->separate_colour_plane)
+        sps->chroma_format_idc = 0;
+
+    sps->width  = get_ue_golomb_long(gb);
+    sps->height = get_ue_golomb_long(gb);
+    if ((ret = av_image_check_size(sps->width,
+                                   sps->height, 0, avctx)) < 0)
+        return ret;
+
+    sps->conformance_window = get_bits1(gb);
+    if (sps->conformance_window) {
+        int vert_mult  = hevc_sub_height_c[sps->chroma_format_idc];
+        int horiz_mult = hevc_sub_width_c[sps->chroma_format_idc];
+        sps->pic_conf_win.left_offset   = get_ue_golomb_long(gb) * horiz_mult;
+        sps->pic_conf_win.right_offset  = get_ue_golomb_long(gb) * horiz_mult;
+        sps->pic_conf_win.top_offset    = get_ue_golomb_long(gb) *  vert_mult;
+        sps->pic_conf_win.bottom_offset = get_ue_golomb_long(gb) *  vert_mult;
+
+        if (avctx->flags2 & AV_CODEC_FLAG2_IGNORE_CROP) {
+            av_log(avctx, AV_LOG_DEBUG,
+                   "discarding sps conformance window, "
+                   "original values are l:%u r:%u t:%u b:%u\n",
+                   sps->pic_conf_win.left_offset,
+                   sps->pic_conf_win.right_offset,
+                   sps->pic_conf_win.top_offset,
+                   sps->pic_conf_win.bottom_offset);
+
+            sps->pic_conf_win.left_offset   =
+            sps->pic_conf_win.right_offset  =
+            sps->pic_conf_win.top_offset    =
+            sps->pic_conf_win.bottom_offset = 0;
+        }
+        sps->output_window = sps->pic_conf_win;
+    }
+
+    sps->bit_depth = get_ue_golomb_31(gb) + 8;
+    if (sps->bit_depth > 16) {
+        av_log(avctx, AV_LOG_ERROR, "Luma bit depth (%d) is out of range\n",
+               sps->bit_depth);
+        return AVERROR_INVALIDDATA;
+    }
+    bit_depth_chroma = get_ue_golomb_31(gb) + 8;
+    if (bit_depth_chroma > 16) {
+        av_log(avctx, AV_LOG_ERROR, "Chroma bit depth (%d) is out of range\n",
+               bit_depth_chroma);
+        return AVERROR_INVALIDDATA;
+    }
+    if (sps->chroma_format_idc && bit_depth_chroma != sps->bit_depth) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Luma bit depth (%d) is different from chroma bit depth (%d), "
+               "this is unsupported.\n",
+               sps->bit_depth, bit_depth_chroma);
+        return AVERROR_INVALIDDATA;
+    }
+    sps->bit_depth_chroma = bit_depth_chroma;
+
+    ret = map_pixel_format(avctx, sps);
+    if (ret < 0)
+        return ret;
+
+    sps->log2_max_poc_lsb = get_ue_golomb_long(gb) + 4;
+    if (sps->log2_max_poc_lsb > 16) {
+        av_log(avctx, AV_LOG_ERROR, "log2_max_pic_order_cnt_lsb_minus4 out range: %d\n",
+               sps->log2_max_poc_lsb - 4);
+        return AVERROR_INVALIDDATA;
+    }
+
+    sps->sublayer_ordering_info = get_bits1(gb);
+    start = sps->sublayer_ordering_info ? 0 : sps->max_sub_layers - 1;
+    for (i = start; i < sps->max_sub_layers; i++) {
+        sps->temporal_layer[i].max_dec_pic_buffering = get_ue_golomb_long(gb) + 1;
+        sps->temporal_layer[i].num_reorder_pics      = get_ue_golomb_long(gb);
+        sps->temporal_layer[i].max_latency_increase  = get_ue_golomb_long(gb) - 1;
+        if (sps->temporal_layer[i].max_dec_pic_buffering > (unsigned)HEVC_MAX_DPB_SIZE) {
+            av_log(avctx, AV_LOG_ERROR, "sps_max_dec_pic_buffering_minus1 out of range: %d\n",
+                   sps->temporal_layer[i].max_dec_pic_buffering - 1U);
+            return AVERROR_INVALIDDATA;
+        }
+        if (sps->temporal_layer[i].num_reorder_pics > sps->temporal_layer[i].max_dec_pic_buffering - 1) {
+            av_log(avctx, AV_LOG_WARNING, "sps_max_num_reorder_pics out of range: %d\n",
+                   sps->temporal_layer[i].num_reorder_pics);
+            if (avctx->err_recognition & AV_EF_EXPLODE ||
+                sps->temporal_layer[i].num_reorder_pics > HEVC_MAX_DPB_SIZE - 1) {
+                return AVERROR_INVALIDDATA;
+            }
+            sps->temporal_layer[i].max_dec_pic_buffering = sps->temporal_layer[i].num_reorder_pics + 1;
+        }
+    }
+
+    if (!sps->sublayer_ordering_info) {
+        for (i = 0; i < start; i++) {
+            sps->temporal_layer[i].max_dec_pic_buffering = sps->temporal_layer[start].max_dec_pic_buffering;
+            sps->temporal_layer[i].num_reorder_pics      = sps->temporal_layer[start].num_reorder_pics;
+            sps->temporal_layer[i].max_latency_increase  = sps->temporal_layer[start].max_latency_increase;
+        }
+    }
+
+    sps->log2_min_cb_size                       = get_ue_golomb_long(gb) + 3;
+    sps->log2_diff_max_min_coding_block_size    = get_ue_golomb_long(gb);
+    sps->log2_min_tb_size                       = get_ue_golomb_long(gb) + 2;
+    sps->log2_diff_max_min_transform_block_size = get_ue_golomb_long(gb);
+    sps->log2_max_trafo_size                    = sps->log2_diff_max_min_transform_block_size +
+                                                  sps->log2_min_tb_size;
+
+    if (sps->log2_min_cb_size < 3 || sps->log2_min_cb_size > 30) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_min_cb_size", sps->log2_min_cb_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (sps->log2_diff_max_min_coding_block_size > 30) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_coding_block_size", sps->log2_diff_max_min_coding_block_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (sps->log2_min_tb_size >= sps->log2_min_cb_size || sps->log2_min_tb_size < 2) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid value for log2_min_tb_size");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (sps->log2_diff_max_min_transform_block_size > 30) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size",
+               sps->log2_diff_max_min_transform_block_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    sps->max_transform_hierarchy_depth_inter = get_ue_golomb_long(gb);
+    sps->max_transform_hierarchy_depth_intra = get_ue_golomb_long(gb);
+
+    sps->scaling_list_enabled = get_bits1(gb);
+    if (sps->scaling_list_enabled) {
+        set_default_scaling_list_data(&sps->scaling_list);
+
+        if (get_bits1(gb)) {
+            ret = scaling_list_data(gb, avctx, &sps->scaling_list, sps);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    sps->amp_enabled = get_bits1(gb);
+    sps->sao_enabled = get_bits1(gb);
+
+    sps->pcm_enabled = get_bits1(gb);
+    if (sps->pcm_enabled) {
+        sps->pcm.bit_depth   = get_bits(gb, 4) + 1;
+        sps->pcm.bit_depth_chroma = get_bits(gb, 4) + 1;
+        sps->pcm.log2_min_pcm_cb_size = get_ue_golomb_long(gb) + 3;
+        sps->pcm.log2_max_pcm_cb_size = sps->pcm.log2_min_pcm_cb_size +
+                                        get_ue_golomb_long(gb);
+        if (FFMAX(sps->pcm.bit_depth, sps->pcm.bit_depth_chroma) > sps->bit_depth) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "PCM bit depth (%d, %d) is greater than normal bit depth (%d)\n",
+                   sps->pcm.bit_depth, sps->pcm.bit_depth_chroma, sps->bit_depth);
+            return AVERROR_INVALIDDATA;
+        }
+
+        sps->pcm_loop_filter_disabled = get_bits1(gb);
+    }
+
+    sps->nb_st_rps = get_ue_golomb_long(gb);
+    if (sps->nb_st_rps > HEVC_MAX_SHORT_TERM_REF_PIC_SETS) {
+        av_log(avctx, AV_LOG_ERROR, "Too many short term RPS: %d.\n",
+               sps->nb_st_rps);
+        return AVERROR_INVALIDDATA;
+    }
+    for (i = 0; i < sps->nb_st_rps; i++) {
+        if ((ret = ff_hevc_decode_short_term_rps(gb, avctx, &sps->st_rps[i],
+                                                 sps, 0)) < 0)
+            return ret;
+    }
+
+    sps->long_term_ref_pics_present = get_bits1(gb);
+    if (sps->long_term_ref_pics_present) {
+        sps->num_long_term_ref_pics_sps = get_ue_golomb_long(gb);
+        if (sps->num_long_term_ref_pics_sps > HEVC_MAX_LONG_TERM_REF_PICS) {
+            av_log(avctx, AV_LOG_ERROR, "Too many long term ref pics: %d.\n",
+                   sps->num_long_term_ref_pics_sps);
+            return AVERROR_INVALIDDATA;
+        }
+
+        sps->used_by_curr_pic_lt = 0;
+        for (i = 0; i < sps->num_long_term_ref_pics_sps; i++) {
+            sps->lt_ref_pic_poc_lsb_sps[i]       = get_bits(gb, sps->log2_max_poc_lsb);
+            sps->used_by_curr_pic_lt            |= get_bits1(gb) * (1 << i);
+        }
+    }
+
+    sps->temporal_mvp_enabled           = get_bits1(gb);
+    sps->strong_intra_smoothing_enabled = get_bits1(gb);
+    sps->vui.common.sar = (AVRational){0, 1};
+    sps->vui_present = get_bits1(gb);
+    if (sps->vui_present)
+        decode_vui(gb, avctx, apply_defdispwin, sps);
+
+    sps->extension_present = get_bits1(gb);
+    if (sps->extension_present) {
+        sps->range_extension               = get_bits1(gb);
+        sps->multilayer_extension          = get_bits1(gb);
+        sps->sps_3d_extension              = get_bits1(gb);
+        sps->scc_extension                 = get_bits1(gb);
+        skip_bits(gb, 4); // sps_extension_4bits
+
+        if (sps->range_extension) {
+            sps->transform_skip_rotation_enabled = get_bits1(gb);
+            sps->transform_skip_context_enabled  = get_bits1(gb);
+            sps->implicit_rdpcm_enabled          = get_bits1(gb);
+            sps->explicit_rdpcm_enabled          = get_bits1(gb);
+
+            sps->extended_precision_processing   = get_bits1(gb);
+            if (sps->extended_precision_processing)
+                av_log(avctx, AV_LOG_WARNING,
+                   "extended_precision_processing_flag not yet implemented\n");
+
+            sps->intra_smoothing_disabled        = get_bits1(gb);
+            sps->high_precision_offsets_enabled  = get_bits1(gb);
+            if (sps->high_precision_offsets_enabled)
+                av_log(avctx, AV_LOG_WARNING,
+                   "high_precision_offsets_enabled_flag not yet implemented\n");
+
+            sps->persistent_rice_adaptation_enabled = get_bits1(gb);
+
+            sps->cabac_bypass_alignment_enabled     = get_bits1(gb);
+            if (sps->cabac_bypass_alignment_enabled)
+                av_log(avctx, AV_LOG_WARNING,
+                   "cabac_bypass_alignment_enabled_flag not yet implemented\n");
+        }
+
+        if (sps->multilayer_extension) {
+            skip_bits1(gb); // inter_view_mv_vert_constraint_flag
+            av_log(avctx, AV_LOG_WARNING,
+                   "sps_multilayer_extension_flag not yet implemented\n");
+        }
+
+        if (sps->sps_3d_extension) {
+            for (i = 0; i <= 1; i++) {
+                skip_bits1(gb); // iv_di_mc_enabled_flag
+                skip_bits1(gb); // iv_mv_scal_enabled_flag
+                if (i == 0) {
+                    get_ue_golomb_long(gb); // log2_ivmc_sub_pb_size_minus3
+                    skip_bits1(gb); // iv_res_pred_enabled_flag
+                    skip_bits1(gb); // depth_ref_enabled_flag
+                    skip_bits1(gb); // vsp_mc_enabled_flag
+                    skip_bits1(gb); // dbbp_enabled_flag
+                } else {
+                    skip_bits1(gb); // tex_mc_enabled_flag
+                    get_ue_golomb_long(gb); // log2_ivmc_sub_pb_size_minus3
+                    skip_bits1(gb); // intra_contour_enabled_flag
+                    skip_bits1(gb); // intra_dc_only_wedge_enabled_flag
+                    skip_bits1(gb); // cqt_cu_part_pred_enabled_flag
+                    skip_bits1(gb); // inter_dc_only_enabled_flag
+                    skip_bits1(gb); // skip_intra_enabled_flag
+                }
+            }
+            av_log(avctx, AV_LOG_WARNING,
+                   "sps_3d_extension_flag not yet implemented\n");
+        }
+
+        if (sps->scc_extension) {
+            sps->curr_pic_ref_enabled = get_bits1(gb);
+            sps->palette_mode_enabled = get_bits1(gb);
+            if (sps->palette_mode_enabled) {
+                sps->palette_max_size = get_ue_golomb(gb);
+                sps->delta_palette_max_predictor_size = get_ue_golomb(gb);
+                sps->palette_predictor_initializers_present = get_bits1(gb);
+
+                if (sps->palette_predictor_initializers_present) {
+                    sps->sps_num_palette_predictor_initializers = get_ue_golomb(gb) + 1;
+                    if (sps->sps_num_palette_predictor_initializers > HEVC_MAX_PALETTE_PREDICTOR_SIZE) {
+                        av_log(avctx, AV_LOG_ERROR,
+                               "sps_num_palette_predictor_initializers out of range: %u\n",
+                               sps->sps_num_palette_predictor_initializers);
+                        return AVERROR_INVALIDDATA;
+                    }
+                    num_comps = !sps->chroma_format_idc ? 1 : 3;
+                    for (int comp = 0; comp < num_comps; comp++) {
+                        int bit_depth = !comp ? sps->bit_depth : sps->bit_depth_chroma;
+                        for (i = 0; i < sps->sps_num_palette_predictor_initializers; i++)
+                            sps->sps_palette_predictor_initializer[comp][i] = get_bits(gb, bit_depth);
+                    }
+                }
+            }
+            sps->motion_vector_resolution_control_idc   = get_bits(gb, 2);
+            sps->intra_boundary_filtering_disabled      = get_bits1(gb);
+        }
+    }
+    if (apply_defdispwin) {
+        sps->output_window.left_offset   += sps->vui.def_disp_win.left_offset;
+        sps->output_window.right_offset  += sps->vui.def_disp_win.right_offset;
+        sps->output_window.top_offset    += sps->vui.def_disp_win.top_offset;
+        sps->output_window.bottom_offset += sps->vui.def_disp_win.bottom_offset;
+    }
+
+    ow = &sps->output_window;
+    if (ow->left_offset >= INT_MAX - ow->right_offset     ||
+        ow->top_offset  >= INT_MAX - ow->bottom_offset    ||
+        ow->left_offset + ow->right_offset  >= sps->width ||
+        ow->top_offset  + ow->bottom_offset >= sps->height) {
+        av_log(avctx, AV_LOG_WARNING, "Invalid cropping offsets: %u/%u/%u/%u\n",
+               ow->left_offset, ow->right_offset, ow->top_offset, ow->bottom_offset);
+        if (avctx->err_recognition & AV_EF_EXPLODE) {
+            return AVERROR_INVALIDDATA;
+        }
+        av_log(avctx, AV_LOG_WARNING,
+               "Displaying the whole video surface.\n");
+        memset(ow, 0, sizeof(*ow));
+        memset(&sps->pic_conf_win, 0, sizeof(sps->pic_conf_win));
+    }
+
+    // Inferred parameters
+    sps->log2_ctb_size = sps->log2_min_cb_size +
+                         sps->log2_diff_max_min_coding_block_size;
+    sps->log2_min_pu_size = sps->log2_min_cb_size - 1;
+
+    if (sps->log2_ctb_size > HEVC_MAX_LOG2_CTB_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "CTB size out of range: 2^%d\n", sps->log2_ctb_size);
+        return AVERROR_INVALIDDATA;
+    }
+    if (sps->log2_ctb_size < 4) {
+        av_log(avctx,
+               AV_LOG_ERROR,
+               "log2_ctb_size %d differs from the bounds of any known profile\n",
+               sps->log2_ctb_size);
+        avpriv_request_sample(avctx, "log2_ctb_size %d", sps->log2_ctb_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    sps->ctb_width  = (sps->width  + (1 << sps->log2_ctb_size) - 1) >> sps->log2_ctb_size;
+    sps->ctb_height = (sps->height + (1 << sps->log2_ctb_size) - 1) >> sps->log2_ctb_size;
+    sps->ctb_size   = sps->ctb_width * sps->ctb_height;
+
+    sps->min_cb_width  = sps->width  >> sps->log2_min_cb_size;
+    sps->min_cb_height = sps->height >> sps->log2_min_cb_size;
+    sps->min_tb_width  = sps->width  >> sps->log2_min_tb_size;
+    sps->min_tb_height = sps->height >> sps->log2_min_tb_size;
+    sps->min_pu_width  = sps->width  >> sps->log2_min_pu_size;
+    sps->min_pu_height = sps->height >> sps->log2_min_pu_size;
+    sps->tb_mask       = (1 << (sps->log2_ctb_size - sps->log2_min_tb_size)) - 1;
+
+    sps->qp_bd_offset = 6 * (sps->bit_depth - 8);
+
+    if (av_mod_uintp2(sps->width, sps->log2_min_cb_size) ||
+        av_mod_uintp2(sps->height, sps->log2_min_cb_size)) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid coded frame dimensions.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (sps->max_transform_hierarchy_depth_inter > sps->log2_ctb_size - sps->log2_min_tb_size) {
+        av_log(avctx, AV_LOG_ERROR, "max_transform_hierarchy_depth_inter out of range: %d\n",
+               sps->max_transform_hierarchy_depth_inter);
+        return AVERROR_INVALIDDATA;
+    }
+    if (sps->max_transform_hierarchy_depth_intra > sps->log2_ctb_size - sps->log2_min_tb_size) {
+        av_log(avctx, AV_LOG_ERROR, "max_transform_hierarchy_depth_intra out of range: %d\n",
+               sps->max_transform_hierarchy_depth_intra);
+        return AVERROR_INVALIDDATA;
+    }
+    if (sps->log2_max_trafo_size > FFMIN(sps->log2_ctb_size, 5)) {
+        av_log(avctx, AV_LOG_ERROR,
+               "max transform block size out of range: %d\n",
+               sps->log2_max_trafo_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (get_bits_left(gb) < 0) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Overread SPS by %d bits\n", -get_bits_left(gb));
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+static void hevc_sps_free(FFRefStructOpaque opaque, void *obj)
+{
+    HEVCSPS *sps = obj;
+
+    av_freep(&sps->data);
+}
+
+static int compare_sps(const HEVCSPS *sps1, const HEVCSPS *sps2)
+{
+    return sps1->data_size == sps2->data_size &&
+           !memcmp(sps1->data, sps2->data, sps1->data_size);
+}
+
+int ff_hevc_decode_nal_sps(GetBitContext *gb, AVCodecContext *avctx,
+                           HEVCParamSets *ps, int apply_defdispwin)
+{
+    HEVCSPS *sps = ff_refstruct_alloc_ext(sizeof(*sps), 0, NULL, hevc_sps_free);
+    unsigned int sps_id;
+    int ret;
+
+    if (!sps)
+        return AVERROR(ENOMEM);
+
+    av_log(avctx, AV_LOG_DEBUG, "Decoding SPS\n");
+
+    sps->data_size = gb->buffer_end - gb->buffer;
+    sps->data = av_memdup(gb->buffer, sps->data_size);
+    if (!sps->data) {
+        ret = AVERROR(ENOMEM);
+        goto err;
+    }
+
+    ret = ff_hevc_parse_sps(sps, gb, &sps_id,
+                            apply_defdispwin,
+                            ps->vps_list, avctx);
+    if (ret < 0)
+        goto err;
+
+    if (avctx->debug & FF_DEBUG_BITSTREAM) {
+        av_log(avctx, AV_LOG_DEBUG,
+               "Parsed SPS: id %d; coded wxh: %dx%d; "
+               "cropped wxh: %dx%d; pix_fmt: %s.\n",
+               sps_id, sps->width, sps->height,
+               sps->width - (sps->output_window.left_offset + sps->output_window.right_offset),
+               sps->height - (sps->output_window.top_offset + sps->output_window.bottom_offset),
+               av_get_pix_fmt_name(sps->pix_fmt));
+    }
+
+    /* check if this is a repeat of an already parsed SPS, then keep the
+     * original one.
+     * otherwise drop all PPSes that depend on it */
+    if (ps->sps_list[sps_id] &&
+        compare_sps(ps->sps_list[sps_id], sps)) {
+        ff_refstruct_unref(&sps);
+    } else {
+        remove_sps(ps, sps_id);
+        ps->sps_list[sps_id] = sps;
+    }
+
+    return 0;
+err:
+    ff_refstruct_unref(&sps);
+    return ret;
+}
+
+static void hevc_pps_free(FFRefStructOpaque unused, void *obj)
+{
+    HEVCPPS *pps = obj;
+
+    av_freep(&pps->column_width);
+    av_freep(&pps->row_height);
+    av_freep(&pps->col_bd);
+    av_freep(&pps->row_bd);
+    av_freep(&pps->col_idxX);
+    av_freep(&pps->ctb_addr_rs_to_ts);
+    av_freep(&pps->ctb_addr_ts_to_rs);
+    av_freep(&pps->tile_pos_rs);
+    av_freep(&pps->tile_id);
+    av_freep(&pps->min_tb_addr_zs_tab);
+    av_freep(&pps->data);
+}
+
+static void colour_mapping_octants(GetBitContext *gb, HEVCPPS *pps, int inp_depth,
+                                   int idx_y, int idx_cb, int idx_cr, int inp_length)
+{
+    unsigned int split_octant_flag, part_num_y, coded_res_flag, res_coeff_q, res_coeff_r;
+    int cm_res_bits;
+
+    part_num_y = 1 << pps->cm_y_part_num_log2;
+
+    split_octant_flag = inp_depth < pps->cm_octant_depth ? get_bits1(gb) : 0;
+
+    if (split_octant_flag)
+        for (int k = 0; k < 2; k++)
+            for (int m = 0; m < 2; m++)
+                for (int n = 0; n < 2; n++)
+                    colour_mapping_octants(gb, pps, inp_depth + 1,
+                                           idx_y + part_num_y * k * inp_length / 2,
+                                           idx_cb + m * inp_length / 2,
+                                           idx_cr + n * inp_length / 2,
+                                           inp_length / 2);
+    else
+        for (int i = 0; i < part_num_y; i++) {
+            for (int j = 0; j < 4; j++) {
+                coded_res_flag = get_bits1(gb);
+                if (coded_res_flag)
+                    for (int c = 0; c < 3; c++) {
+                        res_coeff_q = get_ue_golomb_long(gb);
+                        cm_res_bits = FFMAX(0, 10 + pps->luma_bit_depth_cm_input -
+                                            pps->luma_bit_depth_cm_output -
+                                            pps->cm_res_quant_bits - pps->cm_delta_flc_bits);
+                        res_coeff_r = cm_res_bits ? get_bits(gb, cm_res_bits) : 0;
+                        if (res_coeff_q || res_coeff_r)
+                            skip_bits1(gb);
+                    }
+            }
+        }
+}
+
+static int colour_mapping_table(GetBitContext *gb, AVCodecContext *avctx, HEVCPPS *pps)
+{
+    pps->num_cm_ref_layers = get_ue_golomb(gb) + 1;
+    if (pps->num_cm_ref_layers > 62) {
+        av_log(avctx, AV_LOG_ERROR,
+               "num_cm_ref_layers_minus1 shall be in the range [0, 61].\n");
+        return AVERROR_INVALIDDATA;
+    }
+    for (int i = 0; i < pps->num_cm_ref_layers; i++)
+        pps->cm_ref_layer_id[i] = get_bits(gb, 6);
+
+    pps->cm_octant_depth = get_bits(gb, 2);
+    pps->cm_y_part_num_log2 = get_bits(gb, 2);
+
+    pps->luma_bit_depth_cm_input    = get_ue_golomb(gb) + 8;
+    pps->chroma_bit_depth_cm_input  = get_ue_golomb(gb) + 8;
+    pps->luma_bit_depth_cm_output   = get_ue_golomb(gb) + 8;
+    pps->chroma_bit_depth_cm_output = get_ue_golomb(gb) + 8;
+
+    pps->cm_res_quant_bits = get_bits(gb, 2);
+    pps->cm_delta_flc_bits = get_bits(gb, 2) + 1;
+
+    if (pps->cm_octant_depth == 1) {
+        pps->cm_adapt_threshold_u_delta = get_se_golomb_long(gb);
+        pps->cm_adapt_threshold_v_delta = get_se_golomb_long(gb);
+    }
+
+    colour_mapping_octants(gb, pps, 0, 0, 0, 0, 1 << pps->cm_octant_depth);
+
+    return 0;
+}
+
+static int pps_multilayer_extension(GetBitContext *gb, AVCodecContext *avctx,
+                                    HEVCPPS *pps, const HEVCSPS *sps, const HEVCVPS *vps)
+{
+    pps->poc_reset_info_present_flag = get_bits1(gb);
+    pps->pps_infer_scaling_list_flag = get_bits1(gb);
+    if (pps->pps_infer_scaling_list_flag)
+        pps->pps_scaling_list_ref_layer_id = get_bits(gb, 6);
+
+    pps->num_ref_loc_offsets = get_ue_golomb(gb);
+    if (pps->num_ref_loc_offsets > vps->vps_max_layers - 1)
+        return AVERROR_INVALIDDATA;
+
+    for (int i = 0; i < pps->num_ref_loc_offsets; i++) {
+        pps->ref_loc_offset_layer_id[i] = get_bits(gb, 6);
+        pps->scaled_ref_layer_offset_present_flag[i] = get_bits1(gb);
+        if (pps->scaled_ref_layer_offset_present_flag[i]) {
+            pps->scaled_ref_layer_left_offset[pps->ref_loc_offset_layer_id[i]]   = get_se_golomb_long(gb);
+            pps->scaled_ref_layer_top_offset[pps->ref_loc_offset_layer_id[i]]    = get_se_golomb_long(gb);
+            pps->scaled_ref_layer_right_offset[pps->ref_loc_offset_layer_id[i]]  = get_se_golomb_long(gb);
+            pps->scaled_ref_layer_bottom_offset[pps->ref_loc_offset_layer_id[i]] = get_se_golomb_long(gb);
+        }
+
+        pps->ref_region_offset_present_flag[i] = get_bits1(gb);
+        if (pps->ref_region_offset_present_flag[i]) {
+            pps->ref_region_left_offset[pps->ref_loc_offset_layer_id[i]]   = get_se_golomb_long(gb);
+            pps->ref_region_top_offset[pps->ref_loc_offset_layer_id[i]]    = get_se_golomb_long(gb);
+            pps->ref_region_right_offset[pps->ref_loc_offset_layer_id[i]]  = get_se_golomb_long(gb);
+            pps->ref_region_bottom_offset[pps->ref_loc_offset_layer_id[i]] = get_se_golomb_long(gb);
+        }
+
+        pps->resample_phase_set_present_flag[i] = get_bits1(gb);
+        if (pps->resample_phase_set_present_flag[i]) {
+            pps->phase_hor_luma[pps->ref_loc_offset_layer_id[i]]   = get_ue_golomb_31(gb);
+            pps->phase_ver_luma[pps->ref_loc_offset_layer_id[i]]   = get_ue_golomb_31(gb);
+            pps->phase_hor_chroma[pps->ref_loc_offset_layer_id[i]] = get_ue_golomb(gb) - 8;
+            pps->phase_ver_chroma[pps->ref_loc_offset_layer_id[i]] = get_ue_golomb(gb) - 8;
+        }
+    }
+
+    pps->colour_mapping_enabled_flag = get_bits1(gb);
+    if (pps->colour_mapping_enabled_flag) {
+        int ret = colour_mapping_table(gb, avctx, pps);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static void delta_dlt(GetBitContext *gb, HEVCPPS *pps)
+{
+    unsigned int num_val_delta_dlt, max_diff = 0;
+    int min_diff_minus1 = -1;
+    unsigned int len;
+
+    num_val_delta_dlt = get_bits(gb, pps->pps_bit_depth_for_depth_layers_minus8 + 8);
+    if (num_val_delta_dlt) {
+        if (num_val_delta_dlt > 1)
+            max_diff = get_bits(gb, pps->pps_bit_depth_for_depth_layers_minus8 + 8);
+        if (num_val_delta_dlt > 2 && max_diff) {
+            len = av_log2(max_diff) + 1;
+            min_diff_minus1 = get_bits(gb, len);
+        }
+        if (max_diff > (min_diff_minus1 + 1))
+            for (int k = 1; k < num_val_delta_dlt; k++) {
+                len = av_log2(max_diff - (min_diff_minus1 + 1)) + 1;
+                skip_bits(gb, len); // delta_val_diff_minus_min
+            }
+    }
+}
+
+static int pps_3d_extension(GetBitContext *gb, AVCodecContext *avctx,
+                            HEVCPPS *pps, const HEVCSPS *sps)
+{
+    unsigned int pps_depth_layers_minus1;
+
+    if (get_bits1(gb)) { // dlts_present_flag
+        pps_depth_layers_minus1 = get_bits(gb, 6);
+        pps->pps_bit_depth_for_depth_layers_minus8 = get_bits(gb, 4);
+        for (int i = 0; i <= pps_depth_layers_minus1; i++) {
+            if (get_bits1(gb)) { // dlt_flag[i]
+                if (!get_bits1(gb)) { // dlt_pred_flag[i]
+                    if (get_bits1(gb)) { // dlt_val_flags_present_flag[i]
+                        for (int j = 0; j <= ((1 << (pps->pps_bit_depth_for_depth_layers_minus8 + 8)) - 1); j++)
+                            skip_bits1(gb); // dlt_value_flag[i][j]
+                    } else
+                        delta_dlt(gb, pps);
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int pps_range_extensions(GetBitContext *gb, AVCodecContext *avctx,
+                                HEVCPPS *pps, const HEVCSPS *sps)
+{
+    if (pps->transform_skip_enabled_flag) {
+        pps->log2_max_transform_skip_block_size = get_ue_golomb_31(gb) + 2;
+    }
+    pps->cross_component_prediction_enabled_flag = get_bits1(gb);
+    pps->chroma_qp_offset_list_enabled_flag = get_bits1(gb);
+    if (pps->chroma_qp_offset_list_enabled_flag) {
+        pps->diff_cu_chroma_qp_offset_depth = get_ue_golomb_31(gb);
+        pps->chroma_qp_offset_list_len_minus1 = get_ue_golomb_31(gb);
+        if (pps->chroma_qp_offset_list_len_minus1 > 5) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "chroma_qp_offset_list_len_minus1 shall be in the range [0, 5].\n");
+            return AVERROR_INVALIDDATA;
+        }
+        for (int i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) {
+            pps->cb_qp_offset_list[i] = get_se_golomb(gb);
+            if (pps->cb_qp_offset_list[i]) {
+                av_log(avctx, AV_LOG_WARNING,
+                       "cb_qp_offset_list not tested yet.\n");
+            }
+            pps->cr_qp_offset_list[i] = get_se_golomb(gb);
+            if (pps->cr_qp_offset_list[i]) {
+                av_log(avctx, AV_LOG_WARNING,
+                       "cb_qp_offset_list not tested yet.\n");
+            }
+        }
+    }
+    pps->log2_sao_offset_scale_luma = get_ue_golomb_31(gb);
+    pps->log2_sao_offset_scale_chroma = get_ue_golomb_31(gb);
+
+    if (   pps->log2_sao_offset_scale_luma   > FFMAX(sps->bit_depth        - 10, 0)
+        || pps->log2_sao_offset_scale_chroma > FFMAX(sps->bit_depth_chroma - 10, 0)
+    )
+        return AVERROR_INVALIDDATA;
+
+    return(0);
+}
+
+static int pps_scc_extension(GetBitContext *gb, AVCodecContext *avctx,
+                             HEVCPPS *pps, const HEVCSPS *sps)
+{
+    int num_comps, ret;
+
+    pps->pps_curr_pic_ref_enabled_flag = get_bits1(gb);
+    if (pps->residual_adaptive_colour_transform_enabled_flag = get_bits1(gb)) {
+        pps->pps_slice_act_qp_offsets_present_flag = get_bits1(gb);
+        pps->pps_act_y_qp_offset  = get_se_golomb(gb) - 5;
+        pps->pps_act_cb_qp_offset = get_se_golomb(gb) - 5;
+        pps->pps_act_cr_qp_offset = get_se_golomb(gb) - 3;
+
+#define CHECK_QP_OFFSET(name) (pps->pps_act_ ## name ## _qp_offset <= -12 || \
+                               pps->pps_act_ ## name ## _qp_offset >= 12)
+        ret = CHECK_QP_OFFSET(y) || CHECK_QP_OFFSET(cb) || CHECK_QP_OFFSET(cr);
+#undef CHECK_QP_OFFSET
+        if (ret) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "PpsActQpOffsetY/Cb/Cr shall be in the range of [-12, 12].\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    if (pps->pps_palette_predictor_initializers_present_flag = get_bits1(gb)) {
+        pps->pps_num_palette_predictor_initializers = get_ue_golomb(gb);
+        if (pps->pps_num_palette_predictor_initializers > 0) {
+            if (pps->pps_num_palette_predictor_initializers > HEVC_MAX_PALETTE_PREDICTOR_SIZE) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "pps_num_palette_predictor_initializers out of range: %u\n",
+                       pps->pps_num_palette_predictor_initializers);
+                return AVERROR_INVALIDDATA;
+            }
+            pps->monochrome_palette_flag = get_bits1(gb);
+            pps->luma_bit_depth_entry = get_ue_golomb_31(gb) + 8;
+            if (pps->luma_bit_depth_entry != sps->bit_depth)
+                return AVERROR_INVALIDDATA;
+            if (!pps->monochrome_palette_flag) {
+                pps->chroma_bit_depth_entry = get_ue_golomb_31(gb) + 8;
+                if (pps->chroma_bit_depth_entry != sps->bit_depth_chroma)
+                    return AVERROR_INVALIDDATA;
+            }
+
+            num_comps = pps->monochrome_palette_flag ? 1 : 3;
+            for (int comp = 0; comp < num_comps; comp++) {
+                int bit_depth = !comp ? pps->luma_bit_depth_entry : pps->chroma_bit_depth_entry;
+                for (int i = 0; i < pps->pps_num_palette_predictor_initializers; i++)
+                    pps->pps_palette_predictor_initializer[comp][i] = get_bits(gb, bit_depth);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static inline int setup_pps(AVCodecContext *avctx, GetBitContext *gb,
+                            HEVCPPS *pps, const HEVCSPS *sps)
+{
+    int log2_diff;
+    int pic_area_in_ctbs;
+    int i, j, x, y, ctb_addr_rs, tile_id;
+
+    // Inferred parameters
+    pps->col_bd   = av_malloc_array(pps->num_tile_columns + 1, sizeof(*pps->col_bd));
+    pps->row_bd   = av_malloc_array(pps->num_tile_rows + 1,    sizeof(*pps->row_bd));
+    pps->col_idxX = av_malloc_array(sps->ctb_width,    sizeof(*pps->col_idxX));
+    if (!pps->col_bd || !pps->row_bd || !pps->col_idxX)
+        return AVERROR(ENOMEM);
+
+    if (pps->uniform_spacing_flag) {
+        if (!pps->column_width) {
+            pps->column_width = av_malloc_array(pps->num_tile_columns, sizeof(*pps->column_width));
+            pps->row_height   = av_malloc_array(pps->num_tile_rows,    sizeof(*pps->row_height));
+        }
+        if (!pps->column_width || !pps->row_height)
+            return AVERROR(ENOMEM);
+
+        for (i = 0; i < pps->num_tile_columns; i++) {
+            pps->column_width[i] = ((i + 1) * sps->ctb_width) / pps->num_tile_columns -
+                                   (i * sps->ctb_width) / pps->num_tile_columns;
+        }
+
+        for (i = 0; i < pps->num_tile_rows; i++) {
+            pps->row_height[i] = ((i + 1) * sps->ctb_height) / pps->num_tile_rows -
+                                 (i * sps->ctb_height) / pps->num_tile_rows;
+        }
+    }
+
+    pps->col_bd[0] = 0;
+    for (i = 0; i < pps->num_tile_columns; i++)
+        pps->col_bd[i + 1] = pps->col_bd[i] + pps->column_width[i];
+
+    pps->row_bd[0] = 0;
+    for (i = 0; i < pps->num_tile_rows; i++)
+        pps->row_bd[i + 1] = pps->row_bd[i] + pps->row_height[i];
+
+    for (i = 0, j = 0; i < sps->ctb_width; i++) {
+        if (i > pps->col_bd[j])
+            j++;
+        pps->col_idxX[i] = j;
+    }
+
+    /**
+     * 6.5
+     */
+    pic_area_in_ctbs     = sps->ctb_width    * sps->ctb_height;
+
+    pps->ctb_addr_rs_to_ts = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->ctb_addr_rs_to_ts));
+    pps->ctb_addr_ts_to_rs = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->ctb_addr_ts_to_rs));
+    pps->tile_id           = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->tile_id));
+    pps->min_tb_addr_zs_tab = av_malloc_array((sps->tb_mask+2) * (sps->tb_mask+2), sizeof(*pps->min_tb_addr_zs_tab));
+    if (!pps->ctb_addr_rs_to_ts || !pps->ctb_addr_ts_to_rs ||
+        !pps->tile_id || !pps->min_tb_addr_zs_tab) {
+        return AVERROR(ENOMEM);
+    }
+
+    for (ctb_addr_rs = 0; ctb_addr_rs < pic_area_in_ctbs; ctb_addr_rs++) {
+        int tb_x   = ctb_addr_rs % sps->ctb_width;
+        int tb_y   = ctb_addr_rs / sps->ctb_width;
+        int tile_x = 0;
+        int tile_y = 0;
+        int val    = 0;
+
+        for (i = 0; i < pps->num_tile_columns; i++) {
+            if (tb_x < pps->col_bd[i + 1]) {
+                tile_x = i;
+                break;
+            }
+        }
+
+        for (i = 0; i < pps->num_tile_rows; i++) {
+            if (tb_y < pps->row_bd[i + 1]) {
+                tile_y = i;
+                break;
+            }
+        }
+
+        for (i = 0; i < tile_x; i++)
+            val += pps->row_height[tile_y] * pps->column_width[i];
+        for (i = 0; i < tile_y; i++)
+            val += sps->ctb_width * pps->row_height[i];
+
+        val += (tb_y - pps->row_bd[tile_y]) * pps->column_width[tile_x] +
+               tb_x - pps->col_bd[tile_x];
+
+        pps->ctb_addr_rs_to_ts[ctb_addr_rs] = val;
+        pps->ctb_addr_ts_to_rs[val]         = ctb_addr_rs;
+    }
+
+    for (j = 0, tile_id = 0; j < pps->num_tile_rows; j++)
+        for (i = 0; i < pps->num_tile_columns; i++, tile_id++)
+            for (y = pps->row_bd[j]; y < pps->row_bd[j + 1]; y++)
+                for (x = pps->col_bd[i]; x < pps->col_bd[i + 1]; x++)
+                    pps->tile_id[pps->ctb_addr_rs_to_ts[y * sps->ctb_width + x]] = tile_id;
+
+    pps->tile_pos_rs = av_malloc_array(tile_id, sizeof(*pps->tile_pos_rs));
+    if (!pps->tile_pos_rs)
+        return AVERROR(ENOMEM);
+
+    for (j = 0; j < pps->num_tile_rows; j++)
+        for (i = 0; i < pps->num_tile_columns; i++)
+            pps->tile_pos_rs[j * pps->num_tile_columns + i] =
+                pps->row_bd[j] * sps->ctb_width + pps->col_bd[i];
+
+    log2_diff = sps->log2_ctb_size - sps->log2_min_tb_size;
+    pps->min_tb_addr_zs = &pps->min_tb_addr_zs_tab[1*(sps->tb_mask+2)+1];
+    for (y = 0; y < sps->tb_mask+2; y++) {
+        pps->min_tb_addr_zs_tab[y*(sps->tb_mask+2)] = -1;
+        pps->min_tb_addr_zs_tab[y]    = -1;
+    }
+    for (y = 0; y < sps->tb_mask+1; y++) {
+        for (x = 0; x < sps->tb_mask+1; x++) {
+            int tb_x = x >> log2_diff;
+            int tb_y = y >> log2_diff;
+            int rs   = sps->ctb_width * tb_y + tb_x;
+            int val  = pps->ctb_addr_rs_to_ts[rs] << (log2_diff * 2);
+            for (i = 0; i < log2_diff; i++) {
+                int m = 1 << i;
+                val += (m & x ? m * m : 0) + (m & y ? 2 * m * m : 0);
+            }
+            pps->min_tb_addr_zs[y * (sps->tb_mask+2) + x] = val;
+        }
+    }
+
+    return 0;
+}
+
+int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
+                           HEVCParamSets *ps)
+{
+    const HEVCSPS *sps = NULL;
+    const HEVCVPS *vps = NULL;
+    int i, ret = 0;
+    ptrdiff_t nal_size = gb->buffer_end - gb->buffer;
+    unsigned int pps_id = get_ue_golomb_long(gb);
+    unsigned log2_parallel_merge_level_minus2;
+    HEVCPPS *pps;
+
+    av_log(avctx, AV_LOG_DEBUG, "Decoding PPS\n");
+
+    if (pps_id >= HEVC_MAX_PPS_COUNT) {
+        av_log(avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (ps->pps_list[pps_id]) {
+        const HEVCPPS *pps1 = ps->pps_list[pps_id];
+        if (pps1->data_size == nal_size &&
+            !memcmp(pps1->data, gb->buffer, pps1->data_size))
+            return 0;
+    }
+
+    pps = ff_refstruct_alloc_ext(sizeof(*pps), 0, NULL, hevc_pps_free);
+    if (!pps)
+        return AVERROR(ENOMEM);
+
+    pps->data_size = nal_size;
+    pps->data = av_memdup(gb->buffer, nal_size);
+    if (!pps->data) {
+        ret = AVERROR_INVALIDDATA;
+        goto err;
+    }
+
+    // Default values
+    pps->loop_filter_across_tiles_enabled_flag = 1;
+    pps->num_tile_columns                      = 1;
+    pps->num_tile_rows                         = 1;
+    pps->uniform_spacing_flag                  = 1;
+    pps->disable_dbf                           = 0;
+    pps->beta_offset                           = 0;
+    pps->tc_offset                             = 0;
+    pps->log2_max_transform_skip_block_size    = 2;
+
+    // Coded parameters
+    pps->pps_id = pps_id;
+    pps->sps_id = get_ue_golomb_long(gb);
+    if (pps->sps_id >= HEVC_MAX_SPS_COUNT) {
+        av_log(avctx, AV_LOG_ERROR, "SPS id out of range: %d\n", pps->sps_id);
+        ret = AVERROR_INVALIDDATA;
+        goto err;
+    }
+    if (!ps->sps_list[pps->sps_id]) {
+        av_log(avctx, AV_LOG_ERROR, "SPS %u does not exist.\n", pps->sps_id);
+        ret = AVERROR_INVALIDDATA;
+        goto err;
+    }
+    sps = ps->sps_list[pps->sps_id];
+    vps = ps->vps_list[sps->vps_id];
+
+    pps->dependent_slice_segments_enabled_flag = get_bits1(gb);
+    pps->output_flag_present_flag              = get_bits1(gb);
+    pps->num_extra_slice_header_bits           = get_bits(gb, 3);
+
+    pps->sign_data_hiding_flag = get_bits1(gb);
+
+    pps->cabac_init_present_flag = get_bits1(gb);
+
+    pps->num_ref_idx_l0_default_active = get_ue_golomb_31(gb) + 1;
+    pps->num_ref_idx_l1_default_active = get_ue_golomb_31(gb) + 1;
+    if (pps->num_ref_idx_l0_default_active >= HEVC_MAX_REFS ||
+        pps->num_ref_idx_l1_default_active >= HEVC_MAX_REFS) {
+        av_log(avctx, AV_LOG_ERROR, "Too many default refs in PPS: %d/%d.\n",
+               pps->num_ref_idx_l0_default_active, pps->num_ref_idx_l1_default_active);
+        goto err;
+    }
+
+    pps->pic_init_qp_minus26 = get_se_golomb(gb);
+
+    pps->constrained_intra_pred_flag = get_bits1(gb);
+    pps->transform_skip_enabled_flag = get_bits1(gb);
+
+    pps->cu_qp_delta_enabled_flag = get_bits1(gb);
+    pps->diff_cu_qp_delta_depth   = 0;
+    if (pps->cu_qp_delta_enabled_flag)
+        pps->diff_cu_qp_delta_depth = get_ue_golomb_long(gb);
+
+    if (pps->diff_cu_qp_delta_depth < 0 ||
+        pps->diff_cu_qp_delta_depth > sps->log2_diff_max_min_coding_block_size) {
+        av_log(avctx, AV_LOG_ERROR, "diff_cu_qp_delta_depth %d is invalid\n",
+               pps->diff_cu_qp_delta_depth);
+        ret = AVERROR_INVALIDDATA;
+        goto err;
+    }
+
+    pps->cb_qp_offset = get_se_golomb(gb);
+    if (pps->cb_qp_offset < -12 || pps->cb_qp_offset > 12) {
+        av_log(avctx, AV_LOG_ERROR, "pps_cb_qp_offset out of range: %d\n",
+               pps->cb_qp_offset);
+        ret = AVERROR_INVALIDDATA;
+        goto err;
+    }
+    pps->cr_qp_offset = get_se_golomb(gb);
+    if (pps->cr_qp_offset < -12 || pps->cr_qp_offset > 12) {
+        av_log(avctx, AV_LOG_ERROR, "pps_cr_qp_offset out of range: %d\n",
+               pps->cr_qp_offset);
+        ret = AVERROR_INVALIDDATA;
+        goto err;
+    }
+    pps->pic_slice_level_chroma_qp_offsets_present_flag = get_bits1(gb);
+
+    pps->weighted_pred_flag   = get_bits1(gb);
+    pps->weighted_bipred_flag = get_bits1(gb);
+
+    pps->transquant_bypass_enable_flag    = get_bits1(gb);
+    pps->tiles_enabled_flag               = get_bits1(gb);
+    pps->entropy_coding_sync_enabled_flag = get_bits1(gb);
+
+    if (pps->tiles_enabled_flag) {
+        int num_tile_columns_minus1 = get_ue_golomb(gb);
+        int num_tile_rows_minus1    = get_ue_golomb(gb);
+
+        if (num_tile_columns_minus1 < 0 ||
+            num_tile_columns_minus1 >= sps->ctb_width) {
+            av_log(avctx, AV_LOG_ERROR, "num_tile_columns_minus1 out of range: %d\n",
+                   num_tile_columns_minus1);
+            ret = num_tile_columns_minus1 < 0 ? num_tile_columns_minus1 : AVERROR_INVALIDDATA;
+            goto err;
+        }
+        if (num_tile_rows_minus1 < 0 ||
+            num_tile_rows_minus1 >= sps->ctb_height) {
+            av_log(avctx, AV_LOG_ERROR, "num_tile_rows_minus1 out of range: %d\n",
+                   num_tile_rows_minus1);
+            ret = num_tile_rows_minus1 < 0 ? num_tile_rows_minus1 : AVERROR_INVALIDDATA;
+            goto err;
+        }
+        pps->num_tile_columns = num_tile_columns_minus1 + 1;
+        pps->num_tile_rows    = num_tile_rows_minus1    + 1;
+
+        pps->column_width = av_malloc_array(pps->num_tile_columns, sizeof(*pps->column_width));
+        pps->row_height   = av_malloc_array(pps->num_tile_rows,    sizeof(*pps->row_height));
+        if (!pps->column_width || !pps->row_height) {
+            ret = AVERROR(ENOMEM);
+            goto err;
+        }
+
+        pps->uniform_spacing_flag = get_bits1(gb);
+        if (!pps->uniform_spacing_flag) {
+            uint64_t sum = 0;
+            for (i = 0; i < pps->num_tile_columns - 1; i++) {
+                pps->column_width[i] = get_ue_golomb_long(gb) + 1;
+                sum                 += pps->column_width[i];
+            }
+            if (sum >= sps->ctb_width) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid tile widths.\n");
+                ret = AVERROR_INVALIDDATA;
+                goto err;
+            }
+            pps->column_width[pps->num_tile_columns - 1] = sps->ctb_width - sum;
+
+            sum = 0;
+            for (i = 0; i < pps->num_tile_rows - 1; i++) {
+                pps->row_height[i] = get_ue_golomb_long(gb) + 1;
+                sum               += pps->row_height[i];
+            }
+            if (sum >= sps->ctb_height) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid tile heights.\n");
+                ret = AVERROR_INVALIDDATA;
+                goto err;
+            }
+            pps->row_height[pps->num_tile_rows - 1] = sps->ctb_height - sum;
+        }
+        pps->loop_filter_across_tiles_enabled_flag = get_bits1(gb);
+    }
+
+    pps->seq_loop_filter_across_slices_enabled_flag = get_bits1(gb);
+
+    pps->deblocking_filter_control_present_flag = get_bits1(gb);
+    if (pps->deblocking_filter_control_present_flag) {
+        pps->deblocking_filter_override_enabled_flag = get_bits1(gb);
+        pps->disable_dbf                             = get_bits1(gb);
+        if (!pps->disable_dbf) {
+            int beta_offset_div2 = get_se_golomb(gb);
+            int tc_offset_div2   = get_se_golomb(gb) ;
+            if (beta_offset_div2 < -6 || beta_offset_div2 > 6) {
+                av_log(avctx, AV_LOG_ERROR, "pps_beta_offset_div2 out of range: %d\n",
+                       beta_offset_div2);
+                ret = AVERROR_INVALIDDATA;
+                goto err;
+            }
+            if (tc_offset_div2 < -6 || tc_offset_div2 > 6) {
+                av_log(avctx, AV_LOG_ERROR, "pps_tc_offset_div2 out of range: %d\n",
+                       tc_offset_div2);
+                ret = AVERROR_INVALIDDATA;
+                goto err;
+            }
+            pps->beta_offset = 2 * beta_offset_div2;
+            pps->tc_offset   = 2 *   tc_offset_div2;
+        }
+    }
+
+    pps->scaling_list_data_present_flag = get_bits1(gb);
+    if (pps->scaling_list_data_present_flag) {
+        set_default_scaling_list_data(&pps->scaling_list);
+        ret = scaling_list_data(gb, avctx, &pps->scaling_list, sps);
+        if (ret < 0)
+            goto err;
+    }
+    pps->lists_modification_present_flag = get_bits1(gb);
+    log2_parallel_merge_level_minus2     = get_ue_golomb_long(gb);
+    if (log2_parallel_merge_level_minus2 > sps->log2_ctb_size) {
+        av_log(avctx, AV_LOG_ERROR, "log2_parallel_merge_level_minus2 out of range: %d\n",
+               log2_parallel_merge_level_minus2);
+        ret = AVERROR_INVALIDDATA;
+        goto err;
+    }
+    pps->log2_parallel_merge_level       = log2_parallel_merge_level_minus2 + 2;
+
+    pps->slice_header_extension_present_flag = get_bits1(gb);
+
+    pps->pps_extension_present_flag = get_bits1(gb);
+    if (pps->pps_extension_present_flag) {
+        pps->pps_range_extensions_flag     = get_bits1(gb);
+        pps->pps_multilayer_extension_flag = get_bits1(gb);
+        pps->pps_3d_extension_flag         = get_bits1(gb);
+        pps->pps_scc_extension_flag        = get_bits1(gb);
+        skip_bits(gb, 4); // pps_extension_4bits
+
+        if (sps->ptl.general_ptl.profile_idc >= AV_PROFILE_HEVC_REXT && pps->pps_range_extensions_flag) {
+            if ((ret = pps_range_extensions(gb, avctx, pps, sps)) < 0)
+                goto err;
+        }
+
+        if (pps->pps_multilayer_extension_flag) {
+            if ((ret = pps_multilayer_extension(gb, avctx, pps, sps, vps)) < 0)
+                goto err;
+        }
+
+        if (pps->pps_3d_extension_flag) {
+            if ((ret = pps_3d_extension(gb, avctx, pps, sps)) < 0)
+                goto err;
+        }
+
+        if (pps->pps_scc_extension_flag) {
+            if ((ret = pps_scc_extension(gb, avctx, pps, sps)) < 0)
+                goto err;
+        }
+    }
+
+    ret = setup_pps(avctx, gb, pps, sps);
+    if (ret < 0)
+        goto err;
+
+    if (get_bits_left(gb) < 0) {
+        av_log(avctx, AV_LOG_WARNING,
+               "Overread PPS by %d bits\n", -get_bits_left(gb));
+    }
+
+    remove_pps(ps, pps_id);
+    ps->pps_list[pps_id] = pps;
+
+    return 0;
+
+err:
+    ff_refstruct_unref(&pps);
+    return ret;
+}
+
+void ff_hevc_ps_uninit(HEVCParamSets *ps)
+{
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(ps->vps_list); i++)
+        ff_refstruct_unref(&ps->vps_list[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(ps->sps_list); i++)
+        ff_refstruct_unref(&ps->sps_list[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(ps->pps_list); i++)
+        ff_refstruct_unref(&ps->pps_list[i]);
+
+    ps->sps = NULL;
+    ps->pps = NULL;
+    ps->vps = NULL;
+}
+
+int ff_hevc_compute_poc(const HEVCSPS *sps, int pocTid0, int poc_lsb, int nal_unit_type)
+{
+    int max_poc_lsb  = 1 << sps->log2_max_poc_lsb;
+    int prev_poc_lsb = pocTid0 % max_poc_lsb;
+    int prev_poc_msb = pocTid0 - prev_poc_lsb;
+    int poc_msb;
+
+    if (poc_lsb < prev_poc_lsb && prev_poc_lsb - poc_lsb >= max_poc_lsb / 2)
+        poc_msb = prev_poc_msb + max_poc_lsb;
+    else if (poc_lsb > prev_poc_lsb && poc_lsb - prev_poc_lsb > max_poc_lsb / 2)
+        poc_msb = prev_poc_msb - max_poc_lsb;
+    else
+        poc_msb = prev_poc_msb;
+
+    // For BLA picture types, POCmsb is set to 0.
+    if (nal_unit_type == HEVC_NAL_BLA_W_LP   ||
+        nal_unit_type == HEVC_NAL_BLA_W_RADL ||
+        nal_unit_type == HEVC_NAL_BLA_N_LP)
+        poc_msb = 0;
+
+    return poc_msb + poc_lsb;
+}
diff --git a/libavcodec/hevc/ps.h b/libavcodec/hevc/ps.h
new file mode 100644
index 0000000000..99d70cefd2
--- /dev/null
+++ b/libavcodec/hevc/ps.h
@@ -0,0 +1,486 @@
+/*
+ * HEVC parameter set parsing
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HEVC_PS_H
+#define AVCODEC_HEVC_PS_H
+
+#include <stdint.h>
+
+#include "libavutil/pixfmt.h"
+#include "libavutil/rational.h"
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/get_bits.h"
+#include "libavcodec/h2645_vui.h"
+
+#include "hevc.h"
+
+typedef struct HEVCSublayerHdrParams {
+    uint32_t bit_rate_value_minus1[HEVC_MAX_CPB_CNT];
+    uint32_t cpb_size_value_minus1[HEVC_MAX_CPB_CNT];
+    uint32_t cpb_size_du_value_minus1[HEVC_MAX_CPB_CNT];
+    uint32_t bit_rate_du_value_minus1[HEVC_MAX_CPB_CNT];
+    uint32_t cbr_flag;
+} HEVCSublayerHdrParams;
+
+// flags in bitmask form
+typedef struct HEVCHdrFlagParams {
+    uint8_t fixed_pic_rate_general_flag;
+    uint8_t fixed_pic_rate_within_cvs_flag;
+    uint8_t low_delay_hrd_flag;
+} HEVCHdrFlagParams;
+
+typedef struct HEVCHdrParams {
+    HEVCHdrFlagParams flags;
+    uint8_t nal_hrd_parameters_present_flag;
+    uint8_t vcl_hrd_parameters_present_flag;
+    uint8_t sub_pic_hrd_params_present_flag;
+    uint8_t sub_pic_cpb_params_in_pic_timing_sei_flag;
+
+    uint8_t tick_divisor_minus2;
+    uint8_t du_cpb_removal_delay_increment_length_minus1;
+    uint8_t dpb_output_delay_du_length_minus1;
+    uint8_t bit_rate_scale;
+    uint8_t cpb_size_scale;
+    uint8_t cpb_size_du_scale;
+    uint8_t initial_cpb_removal_delay_length_minus1;
+    uint8_t au_cpb_removal_delay_length_minus1;
+    uint8_t dpb_output_delay_length_minus1;
+    uint8_t cpb_cnt_minus1[HEVC_MAX_SUB_LAYERS];
+    uint16_t elemental_duration_in_tc_minus1[HEVC_MAX_SUB_LAYERS];
+
+    HEVCSublayerHdrParams nal_params[HEVC_MAX_SUB_LAYERS];
+    HEVCSublayerHdrParams vcl_params[HEVC_MAX_SUB_LAYERS];
+} HEVCHdrParams;
+
+typedef struct ShortTermRPS {
+    int32_t delta_poc[32];
+    uint32_t used;
+
+    uint8_t delta_idx;
+    uint8_t num_negative_pics;
+    uint8_t num_delta_pocs;
+    uint8_t rps_idx_num_delta_pocs;
+
+    uint16_t abs_delta_rps;
+    unsigned delta_rps_sign:1;
+
+    unsigned rps_predict:1;
+    unsigned use_delta:1;
+} ShortTermRPS;
+
+typedef struct HEVCWindow {
+    unsigned int left_offset;
+    unsigned int right_offset;
+    unsigned int top_offset;
+    unsigned int bottom_offset;
+} HEVCWindow;
+
+typedef struct VUI {
+    H2645VUI common;
+
+    int neutra_chroma_indication_flag;
+
+    int field_seq_flag;
+    int frame_field_info_present_flag;
+
+    int default_display_window_flag;
+    HEVCWindow def_disp_win;
+
+    int vui_timing_info_present_flag;
+    uint32_t vui_num_units_in_tick;
+    uint32_t vui_time_scale;
+    int vui_poc_proportional_to_timing_flag;
+    int vui_num_ticks_poc_diff_one_minus1;
+    int vui_hrd_parameters_present_flag;
+
+    int bitstream_restriction_flag;
+    int tiles_fixed_structure_flag;
+    int motion_vectors_over_pic_boundaries_flag;
+    int restricted_ref_pic_lists_flag;
+    int min_spatial_segmentation_idc;
+    int max_bytes_per_pic_denom;
+    int max_bits_per_min_cu_denom;
+    int log2_max_mv_length_horizontal;
+    int log2_max_mv_length_vertical;
+} VUI;
+
+typedef struct PTLCommon {
+    uint8_t profile_space;
+    uint8_t tier_flag;
+    uint8_t profile_idc;
+    uint8_t profile_compatibility_flag[32];
+    uint8_t progressive_source_flag;
+    uint8_t interlaced_source_flag;
+    uint8_t non_packed_constraint_flag;
+    uint8_t frame_only_constraint_flag;
+    uint8_t max_12bit_constraint_flag;
+    uint8_t max_10bit_constraint_flag;
+    uint8_t max_8bit_constraint_flag;
+    uint8_t max_422chroma_constraint_flag;
+    uint8_t max_420chroma_constraint_flag;
+    uint8_t max_monochrome_constraint_flag;
+    uint8_t intra_constraint_flag;
+    uint8_t one_picture_only_constraint_flag;
+    uint8_t lower_bit_rate_constraint_flag;
+    uint8_t max_14bit_constraint_flag;
+    uint8_t inbld_flag;
+    uint8_t level_idc;
+} PTLCommon;
+
+typedef struct PTL {
+    PTLCommon general_ptl;
+    PTLCommon sub_layer_ptl[HEVC_MAX_SUB_LAYERS];
+
+    uint8_t sub_layer_profile_present_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_level_present_flag[HEVC_MAX_SUB_LAYERS];
+} PTL;
+
+typedef struct HEVCVPS {
+    unsigned int vps_id;
+
+    uint8_t vps_temporal_id_nesting_flag;
+    int vps_max_layers;
+    int vps_max_sub_layers; ///< vps_max_temporal_layers_minus1 + 1
+
+    PTL ptl;
+    int vps_sub_layer_ordering_info_present_flag;
+    unsigned int vps_max_dec_pic_buffering[HEVC_MAX_SUB_LAYERS];
+    unsigned int vps_num_reorder_pics[HEVC_MAX_SUB_LAYERS];
+    unsigned int vps_max_latency_increase[HEVC_MAX_SUB_LAYERS];
+    int vps_max_layer_id;
+    int vps_num_layer_sets; ///< vps_num_layer_sets_minus1 + 1
+    uint8_t vps_timing_info_present_flag;
+    uint32_t vps_num_units_in_tick;
+    uint32_t vps_time_scale;
+    uint8_t vps_poc_proportional_to_timing_flag;
+    int vps_num_ticks_poc_diff_one; ///< vps_num_ticks_poc_diff_one_minus1 + 1
+    int vps_num_hrd_parameters;
+
+    HEVCHdrParams *hdr;
+
+    uint8_t *data;
+    int data_size;
+} HEVCVPS;
+
+typedef struct ScalingList {
+    /* This is a little wasteful, since sizeID 0 only needs 8 coeffs,
+     * and size ID 3 only has 2 arrays, not 6. */
+    uint8_t sl[4][6][64];
+    uint8_t sl_dc[2][6];
+} ScalingList;
+
+typedef struct HEVCSPS {
+    unsigned vps_id;
+    int chroma_format_idc;
+
+    HEVCWindow output_window;
+
+    HEVCWindow pic_conf_win;
+
+    HEVCHdrParams hdr;
+
+    int bit_depth;
+    int bit_depth_chroma;
+    int pixel_shift;
+    enum AVPixelFormat pix_fmt;
+
+    unsigned int log2_max_poc_lsb;
+
+    int max_sub_layers;
+    struct {
+        int max_dec_pic_buffering;
+        int num_reorder_pics;
+        int max_latency_increase;
+    } temporal_layer[HEVC_MAX_SUB_LAYERS];
+
+    int vui_present;
+    VUI vui;
+    PTL ptl;
+
+    ScalingList scaling_list;
+
+    unsigned int nb_st_rps;
+    ShortTermRPS st_rps[HEVC_MAX_SHORT_TERM_REF_PIC_SETS];
+
+    uint16_t lt_ref_pic_poc_lsb_sps[HEVC_MAX_LONG_TERM_REF_PICS];
+    uint32_t used_by_curr_pic_lt;
+    uint8_t num_long_term_ref_pics_sps;
+
+    struct {
+        uint8_t bit_depth;
+        uint8_t bit_depth_chroma;
+        unsigned int log2_min_pcm_cb_size;
+        unsigned int log2_max_pcm_cb_size;
+    } pcm;
+
+    unsigned int log2_min_cb_size;
+    unsigned int log2_diff_max_min_coding_block_size;
+    unsigned int log2_min_tb_size;
+    unsigned int log2_max_trafo_size;
+    unsigned int log2_ctb_size;
+    unsigned int log2_min_pu_size;
+    unsigned int log2_diff_max_min_transform_block_size;
+
+    int max_transform_hierarchy_depth_inter;
+    int max_transform_hierarchy_depth_intra;
+
+    uint8_t separate_colour_plane;
+    uint8_t conformance_window;
+    uint8_t pcm_enabled;
+    uint8_t pcm_loop_filter_disabled;
+    uint8_t sublayer_ordering_info;
+    uint8_t temporal_id_nesting;
+    uint8_t extension_present;
+    uint8_t scaling_list_enabled;
+    uint8_t amp_enabled;
+    uint8_t sao_enabled;
+    uint8_t long_term_ref_pics_present;
+    uint8_t temporal_mvp_enabled;
+    uint8_t strong_intra_smoothing_enabled;
+    uint8_t range_extension;
+    uint8_t transform_skip_rotation_enabled;
+    uint8_t transform_skip_context_enabled;
+    uint8_t implicit_rdpcm_enabled;
+    uint8_t explicit_rdpcm_enabled;
+    uint8_t extended_precision_processing;
+    uint8_t intra_smoothing_disabled;
+    uint8_t high_precision_offsets_enabled;
+    uint8_t persistent_rice_adaptation_enabled;
+    uint8_t cabac_bypass_alignment_enabled;
+
+    uint8_t multilayer_extension;
+    uint8_t sps_3d_extension;
+
+    uint8_t scc_extension;
+    uint8_t curr_pic_ref_enabled;
+    uint8_t palette_mode_enabled;
+    uint8_t palette_predictor_initializers_present;
+    uint8_t intra_boundary_filtering_disabled;
+
+    int palette_max_size;
+    int delta_palette_max_predictor_size;
+    int sps_num_palette_predictor_initializers;
+    int sps_palette_predictor_initializer[3][HEVC_MAX_PALETTE_PREDICTOR_SIZE];
+    int motion_vector_resolution_control_idc;
+
+    ///< coded frame dimension in various units
+    int width;
+    int height;
+    int ctb_width;
+    int ctb_height;
+    int ctb_size;
+    int min_cb_width;
+    int min_cb_height;
+    int min_tb_width;
+    int min_tb_height;
+    int min_pu_width;
+    int min_pu_height;
+    int tb_mask;
+
+    int hshift[3];
+    int vshift[3];
+
+    int qp_bd_offset;
+
+    uint8_t *data;
+    int data_size;
+} HEVCSPS;
+
+typedef struct HEVCPPS {
+    unsigned int pps_id;
+    unsigned int sps_id; ///< seq_parameter_set_id
+
+    uint8_t sign_data_hiding_flag;
+
+    uint8_t cabac_init_present_flag;
+
+    int num_ref_idx_l0_default_active; ///< num_ref_idx_l0_default_active_minus1 + 1
+    int num_ref_idx_l1_default_active; ///< num_ref_idx_l1_default_active_minus1 + 1
+    int pic_init_qp_minus26;
+
+    uint8_t constrained_intra_pred_flag;
+    uint8_t transform_skip_enabled_flag;
+
+    uint8_t cu_qp_delta_enabled_flag;
+    int diff_cu_qp_delta_depth;
+
+    int cb_qp_offset;
+    int cr_qp_offset;
+    uint8_t pic_slice_level_chroma_qp_offsets_present_flag;
+    uint8_t weighted_pred_flag;
+    uint8_t weighted_bipred_flag;
+    uint8_t output_flag_present_flag;
+    uint8_t transquant_bypass_enable_flag;
+
+    uint8_t dependent_slice_segments_enabled_flag;
+    uint8_t tiles_enabled_flag;
+    uint8_t entropy_coding_sync_enabled_flag;
+
+    uint16_t num_tile_columns;   ///< num_tile_columns_minus1 + 1
+    uint16_t num_tile_rows;      ///< num_tile_rows_minus1 + 1
+    uint8_t uniform_spacing_flag;
+    uint8_t loop_filter_across_tiles_enabled_flag;
+
+    uint8_t seq_loop_filter_across_slices_enabled_flag;
+
+    uint8_t deblocking_filter_control_present_flag;
+    uint8_t deblocking_filter_override_enabled_flag;
+    uint8_t disable_dbf;
+    int beta_offset;    ///< beta_offset_div2 * 2
+    int tc_offset;      ///< tc_offset_div2 * 2
+
+    uint8_t scaling_list_data_present_flag;
+    ScalingList scaling_list;
+
+    uint8_t lists_modification_present_flag;
+    int log2_parallel_merge_level; ///< log2_parallel_merge_level_minus2 + 2
+    int num_extra_slice_header_bits;
+    uint8_t slice_header_extension_present_flag;
+    uint8_t log2_max_transform_skip_block_size;
+    uint8_t pps_extension_present_flag;
+    uint8_t pps_range_extensions_flag;
+    uint8_t pps_multilayer_extension_flag;
+    uint8_t pps_3d_extension_flag;
+    uint8_t pps_scc_extension_flag;
+    uint8_t cross_component_prediction_enabled_flag;
+    uint8_t chroma_qp_offset_list_enabled_flag;
+    uint8_t diff_cu_chroma_qp_offset_depth;
+    uint8_t chroma_qp_offset_list_len_minus1;
+    int8_t  cb_qp_offset_list[6];
+    int8_t  cr_qp_offset_list[6];
+    uint8_t log2_sao_offset_scale_luma;
+    uint8_t log2_sao_offset_scale_chroma;
+
+    // Multilayer extension parameters
+    uint8_t poc_reset_info_present_flag;
+    uint8_t pps_infer_scaling_list_flag;
+    uint8_t pps_scaling_list_ref_layer_id;
+    uint8_t num_ref_loc_offsets;
+    uint8_t ref_loc_offset_layer_id[64];
+    uint8_t scaled_ref_layer_offset_present_flag[64];
+    int16_t scaled_ref_layer_left_offset[64];
+    int16_t scaled_ref_layer_top_offset[64];
+    int16_t scaled_ref_layer_right_offset[64];
+    int16_t scaled_ref_layer_bottom_offset[64];
+    uint8_t ref_region_offset_present_flag[64];
+    int16_t ref_region_left_offset[64];
+    int16_t ref_region_top_offset[64];
+    int16_t ref_region_right_offset[64];
+    int16_t ref_region_bottom_offset[64];
+    uint8_t resample_phase_set_present_flag[64];
+    uint8_t phase_hor_luma[64];
+    uint8_t phase_ver_luma[64];
+    int8_t phase_hor_chroma[64];
+    int8_t phase_ver_chroma[64];
+    uint8_t colour_mapping_enabled_flag;
+    uint8_t num_cm_ref_layers;
+    uint8_t cm_ref_layer_id[62];
+    uint8_t cm_octant_depth;
+    uint8_t cm_y_part_num_log2;
+    uint8_t luma_bit_depth_cm_input;
+    uint8_t chroma_bit_depth_cm_input;
+    uint8_t luma_bit_depth_cm_output;
+    uint8_t chroma_bit_depth_cm_output;
+    uint8_t cm_res_quant_bits;
+    uint8_t cm_delta_flc_bits;
+    int8_t cm_adapt_threshold_u_delta;
+    int8_t cm_adapt_threshold_v_delta;
+
+    // 3D extension parameters
+    uint8_t pps_bit_depth_for_depth_layers_minus8;
+
+    // SCC extension parameters
+    uint8_t pps_curr_pic_ref_enabled_flag;
+    uint8_t residual_adaptive_colour_transform_enabled_flag;
+    uint8_t pps_slice_act_qp_offsets_present_flag;
+    int8_t  pps_act_y_qp_offset;  // _plus5
+    int8_t  pps_act_cb_qp_offset; // _plus5
+    int8_t  pps_act_cr_qp_offset; // _plus3
+    uint8_t pps_palette_predictor_initializers_present_flag;
+    uint8_t pps_num_palette_predictor_initializers;
+    uint8_t monochrome_palette_flag;
+    uint8_t luma_bit_depth_entry;
+    uint8_t chroma_bit_depth_entry;
+    uint16_t pps_palette_predictor_initializer[3][HEVC_MAX_PALETTE_PREDICTOR_SIZE];
+
+    // Inferred parameters
+    unsigned int *column_width;  ///< ColumnWidth
+    unsigned int *row_height;    ///< RowHeight
+    unsigned int *col_bd;        ///< ColBd
+    unsigned int *row_bd;        ///< RowBd
+    int *col_idxX;
+
+    int *ctb_addr_rs_to_ts; ///< CtbAddrRSToTS
+    int *ctb_addr_ts_to_rs; ///< CtbAddrTSToRS
+    int *tile_id;           ///< TileId
+    int *tile_pos_rs;       ///< TilePosRS
+    int *min_tb_addr_zs;    ///< MinTbAddrZS
+    int *min_tb_addr_zs_tab;///< MinTbAddrZS
+
+    uint8_t *data;
+    int data_size;
+} HEVCPPS;
+
+typedef struct HEVCParamSets {
+    const HEVCVPS *vps_list[HEVC_MAX_VPS_COUNT]; ///< RefStruct references
+    const HEVCSPS *sps_list[HEVC_MAX_SPS_COUNT]; ///< RefStruct references
+    const HEVCPPS *pps_list[HEVC_MAX_PPS_COUNT]; ///< RefStruct references
+
+    /* currently active parameter sets */
+    const HEVCVPS *vps;
+    const HEVCSPS *sps;
+    const HEVCPPS *pps;
+} HEVCParamSets;
+
+/**
+ * Parse the SPS from the bitstream into the provided HEVCSPS struct.
+ *
+ * @param sps_id the SPS id will be written here
+ * @param apply_defdispwin if set 1, the default display window from the VUI
+ *                         will be applied to the video dimensions
+ * @param vps_list if non-NULL, this function will validate that the SPS refers
+ *                 to an existing VPS
+ */
+int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
+                      int apply_defdispwin, const HEVCVPS * const *vps_list,
+                      AVCodecContext *avctx);
+
+int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
+                           HEVCParamSets *ps);
+int ff_hevc_decode_nal_sps(GetBitContext *gb, AVCodecContext *avctx,
+                           HEVCParamSets *ps, int apply_defdispwin);
+int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
+                           HEVCParamSets *ps);
+
+void ff_hevc_ps_uninit(HEVCParamSets *ps);
+
+int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
+                                  ShortTermRPS *rps, const HEVCSPS *sps, int is_slice_header);
+
+int ff_hevc_encode_nal_vps(HEVCVPS *vps, unsigned int id,
+                           uint8_t *buf, int buf_size);
+
+/**
+ * Compute POC of the current frame and return it.
+ */
+int ff_hevc_compute_poc(const HEVCSPS *sps, int pocTid0, int poc_lsb, int nal_unit_type);
+
+#endif /* AVCODEC_HEVC_PS_H */
diff --git a/libavcodec/hevc/ps_enc.c b/libavcodec/hevc/ps_enc.c
new file mode 100644
index 0000000000..7fbcb3ba4e
--- /dev/null
+++ b/libavcodec/hevc/ps_enc.c
@@ -0,0 +1,121 @@
+/*
+ * HEVC Parameter Set encoding
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "put_golomb.h"
+#include "ps.h"
+#include "put_bits.h"
+
+static void write_ptl_layer(PutBitContext *pb, PTLCommon *ptl)
+{
+    int i;
+
+    put_bits(pb, 2, ptl->profile_space);
+    put_bits(pb, 1, ptl->tier_flag);
+    put_bits(pb, 5, ptl->profile_idc);
+    for (i = 0; i < 32; i++)
+        put_bits(pb, 1, ptl->profile_compatibility_flag[i]);
+    put_bits(pb, 1, ptl->progressive_source_flag);
+    put_bits(pb, 1, ptl->interlaced_source_flag);
+    put_bits(pb, 1, ptl->non_packed_constraint_flag);
+    put_bits(pb, 1, ptl->frame_only_constraint_flag);
+    put_bits32(pb, 0);   // reserved
+    put_bits(pb, 12, 0); // reserved
+}
+
+static void write_ptl(PutBitContext *pb, PTL *ptl, int max_num_sub_layers)
+{
+    int i;
+
+    write_ptl_layer(pb, &ptl->general_ptl);
+    put_bits(pb, 8, ptl->general_ptl.level_idc);
+
+    for (i = 0; i < max_num_sub_layers - 1; i++) {
+        put_bits(pb, 1, ptl->sub_layer_profile_present_flag[i]);
+        put_bits(pb, 1, ptl->sub_layer_level_present_flag[i]);
+    }
+
+    if (max_num_sub_layers > 1)
+        for (i = max_num_sub_layers - 1; i < 8; i++)
+            put_bits(pb, 2, 0); // reserved
+
+    for (i = 0; i < max_num_sub_layers - 1; i++) {
+        if (ptl->sub_layer_profile_present_flag[i])
+            write_ptl_layer(pb, &ptl->sub_layer_ptl[i]);
+        if (ptl->sub_layer_level_present_flag[i])
+            put_bits(pb, 8, ptl->sub_layer_ptl[i].level_idc);
+    }
+}
+
+int ff_hevc_encode_nal_vps(HEVCVPS *vps, unsigned int id,
+                           uint8_t *buf, int buf_size)
+{
+    PutBitContext pb;
+    int i, data_size;
+
+    init_put_bits(&pb, buf, buf_size);
+    put_bits(&pb,  4, id);
+    put_bits(&pb,  2, 3);                               // reserved
+    put_bits(&pb,  6, vps->vps_max_layers - 1);
+    put_bits(&pb,  3, vps->vps_max_sub_layers - 1);
+    put_bits(&pb,  1, vps->vps_temporal_id_nesting_flag);
+    put_bits(&pb, 16, 0xffff);                          // reserved
+
+    write_ptl(&pb, &vps->ptl, vps->vps_max_sub_layers);
+
+    put_bits(&pb, 1, vps->vps_sub_layer_ordering_info_present_flag);
+    for (i = vps->vps_sub_layer_ordering_info_present_flag ? 0 : vps->vps_max_layers - 1;
+         i < vps->vps_max_sub_layers; i++) {
+        set_ue_golomb(&pb, vps->vps_max_dec_pic_buffering[i] - 1);
+        set_ue_golomb(&pb, vps->vps_num_reorder_pics[i]);
+        set_ue_golomb(&pb, vps->vps_max_latency_increase[i] + 1);
+    }
+
+    put_bits(&pb, 6, vps->vps_max_layer_id);
+    set_ue_golomb(&pb, vps->vps_num_layer_sets - 1);
+
+    if (vps->vps_num_layer_sets > 1) {
+        avpriv_report_missing_feature(NULL, "Writing layer_id_included_flag");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    put_bits(&pb, 1, vps->vps_timing_info_present_flag);
+    if (vps->vps_timing_info_present_flag) {
+        put_bits32(&pb, vps->vps_num_units_in_tick);
+        put_bits32(&pb, vps->vps_time_scale);
+        put_bits(&pb, 1, vps->vps_poc_proportional_to_timing_flag);
+        if (vps->vps_poc_proportional_to_timing_flag)
+            set_ue_golomb(&pb, vps->vps_num_ticks_poc_diff_one - 1);
+
+        set_ue_golomb(&pb, vps->vps_num_hrd_parameters);
+        if (vps->vps_num_hrd_parameters) {
+            avpriv_report_missing_feature(NULL, "Writing HRD parameters");
+            return AVERROR_PATCHWELCOME;
+        }
+    }
+
+    put_bits(&pb, 1, 0);    // extension flag
+
+    put_bits(&pb, 1, 1);    // stop bit
+    flush_put_bits(&pb);
+
+    data_size = put_bytes_output(&pb);
+
+    return data_size;
+}
diff --git a/libavcodec/hevc/refs.c b/libavcodec/hevc/refs.c
new file mode 100644
index 0000000000..39ce70ca39
--- /dev/null
+++ b/libavcodec/hevc/refs.c
@@ -0,0 +1,552 @@
+/*
+ * HEVC video decoder
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ * Copyright (C) 2012 - 2013 Gildas Cocherel
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mem.h"
+
+#include "decode.h"
+#include "hevc.h"
+#include "hevcdec.h"
+#include "progressframe.h"
+#include "refstruct.h"
+
+void ff_hevc_unref_frame(HEVCFrame *frame, int flags)
+{
+    frame->flags &= ~flags;
+    if (!frame->flags) {
+        ff_progress_frame_unref(&frame->tf);
+        av_frame_unref(frame->frame_grain);
+        frame->needs_fg = 0;
+
+        ff_refstruct_unref(&frame->tab_mvf);
+
+        ff_refstruct_unref(&frame->rpl);
+        frame->nb_rpl_elems = 0;
+        ff_refstruct_unref(&frame->rpl_tab);
+        frame->refPicList = NULL;
+
+        ff_refstruct_unref(&frame->hwaccel_picture_private);
+    }
+}
+
+const RefPicList *ff_hevc_get_ref_list(const HEVCContext *s,
+                                       const HEVCFrame *ref, int x0, int y0)
+{
+    int x_cb         = x0 >> s->ps.sps->log2_ctb_size;
+    int y_cb         = y0 >> s->ps.sps->log2_ctb_size;
+    int pic_width_cb = s->ps.sps->ctb_width;
+    int ctb_addr_ts  = s->ps.pps->ctb_addr_rs_to_ts[y_cb * pic_width_cb + x_cb];
+    return &ref->rpl_tab[ctb_addr_ts]->refPicList[0];
+}
+
+void ff_hevc_clear_refs(HEVCContext *s)
+{
+    int i;
+    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++)
+        ff_hevc_unref_frame(&s->DPB[i],
+                            HEVC_FRAME_FLAG_SHORT_REF |
+                            HEVC_FRAME_FLAG_LONG_REF);
+}
+
+void ff_hevc_flush_dpb(HEVCContext *s)
+{
+    int i;
+    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++)
+        ff_hevc_unref_frame(&s->DPB[i], ~0);
+}
+
+static HEVCFrame *alloc_frame(HEVCContext *s)
+{
+    int i, j, ret;
+    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+        HEVCFrame *frame = &s->DPB[i];
+        if (frame->f)
+            continue;
+
+        ret = ff_progress_frame_get_buffer(s->avctx, &frame->tf,
+                                           AV_GET_BUFFER_FLAG_REF);
+        if (ret < 0)
+            return NULL;
+
+        frame->rpl = ff_refstruct_allocz(s->pkt.nb_nals * sizeof(*frame->rpl));
+        if (!frame->rpl)
+            goto fail;
+        frame->nb_rpl_elems = s->pkt.nb_nals;
+
+        frame->tab_mvf = ff_refstruct_pool_get(s->tab_mvf_pool);
+        if (!frame->tab_mvf)
+            goto fail;
+
+        frame->rpl_tab = ff_refstruct_pool_get(s->rpl_tab_pool);
+        if (!frame->rpl_tab)
+            goto fail;
+        frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
+        for (j = 0; j < frame->ctb_count; j++)
+            frame->rpl_tab[j] = frame->rpl;
+
+        if (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD)
+            frame->f->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST;
+        if ((s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) ||
+            (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD))
+            frame->f->flags |= AV_FRAME_FLAG_INTERLACED;
+
+        ret = ff_hwaccel_frame_priv_alloc(s->avctx, &frame->hwaccel_picture_private);
+        if (ret < 0)
+            goto fail;
+
+        return frame;
+fail:
+        ff_hevc_unref_frame(frame, ~0);
+        return NULL;
+    }
+    av_log(s->avctx, AV_LOG_ERROR, "Error allocating frame, DPB full.\n");
+    return NULL;
+}
+
+int ff_hevc_set_new_ref(HEVCContext *s, int poc)
+{
+    HEVCFrame *ref;
+    int i;
+
+    /* check that this POC doesn't already exist */
+    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+        HEVCFrame *frame = &s->DPB[i];
+
+        if (frame->f && frame->sequence == s->seq_decode &&
+            frame->poc == poc) {
+            av_log(s->avctx, AV_LOG_ERROR, "Duplicate POC in a sequence: %d.\n",
+                   poc);
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    ref = alloc_frame(s);
+    if (!ref)
+        return AVERROR(ENOMEM);
+
+    s->cur_frame = ref;
+    s->collocated_ref = NULL;
+
+    if (s->sh.pic_output_flag)
+        ref->flags = HEVC_FRAME_FLAG_OUTPUT | HEVC_FRAME_FLAG_SHORT_REF;
+    else
+        ref->flags = HEVC_FRAME_FLAG_SHORT_REF;
+
+    ref->poc      = poc;
+    ref->sequence = s->seq_decode;
+    ref->f->crop_left   = s->ps.sps->output_window.left_offset;
+    ref->f->crop_right  = s->ps.sps->output_window.right_offset;
+    ref->f->crop_top    = s->ps.sps->output_window.top_offset;
+    ref->f->crop_bottom = s->ps.sps->output_window.bottom_offset;
+
+    return 0;
+}
+
+static void unref_missing_refs(HEVCContext *s)
+{
+    for (int i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+         HEVCFrame *frame = &s->DPB[i];
+         if (frame->sequence == HEVC_SEQUENCE_COUNTER_INVALID) {
+             ff_hevc_unref_frame(frame, ~0);
+         }
+    }
+}
+
+int ff_hevc_output_frame(HEVCContext *s, AVFrame *out, int flush)
+{
+    if (IS_IRAP(s) && s->no_rasl_output_flag == 1) {
+        const static int mask = HEVC_FRAME_FLAG_BUMPING | HEVC_FRAME_FLAG_OUTPUT;
+        for (int i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+            HEVCFrame *frame = &s->DPB[i];
+            if ((frame->flags & mask) == HEVC_FRAME_FLAG_OUTPUT &&
+                frame->sequence != s->seq_decode) {
+                if (s->sh.no_output_of_prior_pics_flag == 1)
+                    ff_hevc_unref_frame(frame, HEVC_FRAME_FLAG_OUTPUT);
+                else
+                    frame->flags |= HEVC_FRAME_FLAG_BUMPING;
+            }
+        }
+    }
+    do {
+        int nb_output = 0;
+        int min_poc   = INT_MAX;
+        int i, min_idx, ret;
+
+        for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+            HEVCFrame *frame = &s->DPB[i];
+            if ((frame->flags & HEVC_FRAME_FLAG_OUTPUT) &&
+                frame->sequence == s->seq_output) {
+                nb_output++;
+                if (frame->poc < min_poc || nb_output == 1) {
+                    min_poc = frame->poc;
+                    min_idx = i;
+                }
+            }
+        }
+
+        /* wait for more frames before output */
+        if (!flush && s->seq_output == s->seq_decode && s->ps.sps &&
+            nb_output <= s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].num_reorder_pics)
+            return 0;
+
+        if (nb_output) {
+            HEVCFrame *frame = &s->DPB[min_idx];
+
+            ret = av_frame_ref(out, frame->needs_fg ? frame->frame_grain : frame->f);
+            if (frame->flags & HEVC_FRAME_FLAG_BUMPING)
+                ff_hevc_unref_frame(frame, HEVC_FRAME_FLAG_OUTPUT | HEVC_FRAME_FLAG_BUMPING);
+            else
+                ff_hevc_unref_frame(frame, HEVC_FRAME_FLAG_OUTPUT);
+            if (ret < 0)
+                return ret;
+
+            if (frame->needs_fg && (ret = av_frame_copy_props(out, frame->f)) < 0)
+                return ret;
+
+            if (!(s->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN))
+                av_frame_remove_side_data(out, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
+
+            av_log(s->avctx, AV_LOG_DEBUG,
+                   "Output frame with POC %d.\n", frame->poc);
+            return 1;
+        }
+
+        if (s->seq_output != s->seq_decode)
+            s->seq_output = (s->seq_output + 1) & HEVC_SEQUENCE_COUNTER_MASK;
+        else
+            break;
+    } while (1);
+
+    return 0;
+}
+
+void ff_hevc_bump_frame(HEVCContext *s)
+{
+    int dpb = 0;
+    int min_poc = INT_MAX;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+        HEVCFrame *frame = &s->DPB[i];
+        if ((frame->flags) &&
+            frame->sequence == s->seq_output &&
+            frame->poc != s->poc) {
+            dpb++;
+        }
+    }
+
+    if (s->ps.sps && dpb >= s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering) {
+        for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+            HEVCFrame *frame = &s->DPB[i];
+            if ((frame->flags) &&
+                frame->sequence == s->seq_output &&
+                frame->poc != s->poc) {
+                if (frame->flags == HEVC_FRAME_FLAG_OUTPUT && frame->poc < min_poc) {
+                    min_poc = frame->poc;
+                }
+            }
+        }
+
+        for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+            HEVCFrame *frame = &s->DPB[i];
+            if (frame->flags & HEVC_FRAME_FLAG_OUTPUT &&
+                frame->sequence == s->seq_output &&
+                frame->poc <= min_poc) {
+                frame->flags |= HEVC_FRAME_FLAG_BUMPING;
+            }
+        }
+
+        dpb--;
+    }
+}
+
+static int init_slice_rpl(HEVCContext *s)
+{
+    HEVCFrame *frame = s->cur_frame;
+    int ctb_count    = frame->ctb_count;
+    int ctb_addr_ts  = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr];
+    int i;
+
+    if (s->slice_idx >= frame->nb_rpl_elems)
+        return AVERROR_INVALIDDATA;
+
+    for (i = ctb_addr_ts; i < ctb_count; i++)
+        frame->rpl_tab[i] = frame->rpl + s->slice_idx;
+
+    frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts];
+
+    return 0;
+}
+
+int ff_hevc_slice_rpl(HEVCContext *s)
+{
+    SliceHeader *sh = &s->sh;
+
+    uint8_t nb_list = sh->slice_type == HEVC_SLICE_B ? 2 : 1;
+    uint8_t list_idx;
+    int i, j, ret;
+
+    ret = init_slice_rpl(s);
+    if (ret < 0)
+        return ret;
+
+    if (!(s->rps[ST_CURR_BEF].nb_refs + s->rps[ST_CURR_AFT].nb_refs +
+          s->rps[LT_CURR].nb_refs) && !s->ps.pps->pps_curr_pic_ref_enabled_flag) {
+        av_log(s->avctx, AV_LOG_ERROR, "Zero refs in the frame RPS.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    for (list_idx = 0; list_idx < nb_list; list_idx++) {
+        RefPicList  rpl_tmp = { { 0 } };
+        RefPicList *rpl     = &s->cur_frame->refPicList[list_idx];
+
+        /* The order of the elements is
+         * ST_CURR_BEF - ST_CURR_AFT - LT_CURR for the L0 and
+         * ST_CURR_AFT - ST_CURR_BEF - LT_CURR for the L1 */
+        int cand_lists[3] = { list_idx ? ST_CURR_AFT : ST_CURR_BEF,
+                              list_idx ? ST_CURR_BEF : ST_CURR_AFT,
+                              LT_CURR };
+
+        /* concatenate the candidate lists for the current frame */
+        while (rpl_tmp.nb_refs < sh->nb_refs[list_idx]) {
+            for (i = 0; i < FF_ARRAY_ELEMS(cand_lists); i++) {
+                RefPicList *rps = &s->rps[cand_lists[i]];
+                for (j = 0; j < rps->nb_refs && rpl_tmp.nb_refs < HEVC_MAX_REFS; j++) {
+                    rpl_tmp.list[rpl_tmp.nb_refs]       = rps->list[j];
+                    rpl_tmp.ref[rpl_tmp.nb_refs]        = rps->ref[j];
+                    rpl_tmp.isLongTerm[rpl_tmp.nb_refs] = i == 2;
+                    rpl_tmp.nb_refs++;
+                }
+            }
+            // Construct RefPicList0, RefPicList1 (8-8, 8-10)
+            if (s->ps.pps->pps_curr_pic_ref_enabled_flag && rpl_tmp.nb_refs < HEVC_MAX_REFS) {
+                rpl_tmp.list[rpl_tmp.nb_refs]           = s->cur_frame->poc;
+                rpl_tmp.ref[rpl_tmp.nb_refs]            = s->cur_frame;
+                rpl_tmp.isLongTerm[rpl_tmp.nb_refs]     = 1;
+                rpl_tmp.nb_refs++;
+            }
+        }
+
+        /* reorder the references if necessary */
+        if (sh->rpl_modification_flag[list_idx]) {
+            for (i = 0; i < sh->nb_refs[list_idx]; i++) {
+                int idx = sh->list_entry_lx[list_idx][i];
+
+                if (idx >= rpl_tmp.nb_refs) {
+                    av_log(s->avctx, AV_LOG_ERROR, "Invalid reference index.\n");
+                    return AVERROR_INVALIDDATA;
+                }
+
+                rpl->list[i]       = rpl_tmp.list[idx];
+                rpl->ref[i]        = rpl_tmp.ref[idx];
+                rpl->isLongTerm[i] = rpl_tmp.isLongTerm[idx];
+                rpl->nb_refs++;
+            }
+        } else {
+            memcpy(rpl, &rpl_tmp, sizeof(*rpl));
+            rpl->nb_refs = FFMIN(rpl->nb_refs, sh->nb_refs[list_idx]);
+        }
+
+        // 8-9
+        if (s->ps.pps->pps_curr_pic_ref_enabled_flag &&
+            !sh->rpl_modification_flag[list_idx] &&
+            rpl_tmp.nb_refs > sh->nb_refs[L0]) {
+            rpl->list[sh->nb_refs[L0] - 1] = s->cur_frame->poc;
+            rpl->ref[sh->nb_refs[L0] - 1]  = s->cur_frame;
+        }
+
+        if (sh->collocated_list == list_idx &&
+            sh->collocated_ref_idx < rpl->nb_refs)
+            s->collocated_ref = rpl->ref[sh->collocated_ref_idx];
+    }
+
+    return 0;
+}
+
+static HEVCFrame *find_ref_idx(HEVCContext *s, int poc, uint8_t use_msb)
+{
+    int mask = use_msb ? ~0 : (1 << s->ps.sps->log2_max_poc_lsb) - 1;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+        HEVCFrame *ref = &s->DPB[i];
+        if (ref->f && ref->sequence == s->seq_decode) {
+            if ((ref->poc & mask) == poc && (use_msb || ref->poc != s->poc))
+                return ref;
+        }
+    }
+
+    if (s->nal_unit_type != HEVC_NAL_CRA_NUT && !IS_BLA(s))
+        av_log(s->avctx, AV_LOG_ERROR,
+               "Could not find ref with POC %d\n", poc);
+    return NULL;
+}
+
+static void mark_ref(HEVCFrame *frame, int flag)
+{
+    frame->flags &= ~(HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF);
+    frame->flags |= flag;
+}
+
+static HEVCFrame *generate_missing_ref(HEVCContext *s, int poc)
+{
+    HEVCFrame *frame;
+    int i, y;
+
+    frame = alloc_frame(s);
+    if (!frame)
+        return NULL;
+
+    if (!s->avctx->hwaccel) {
+        if (!s->ps.sps->pixel_shift) {
+            for (i = 0; frame->f->data[i]; i++)
+                memset(frame->f->data[i], 1 << (s->ps.sps->bit_depth - 1),
+                       frame->f->linesize[i] * AV_CEIL_RSHIFT(s->ps.sps->height, s->ps.sps->vshift[i]));
+        } else {
+            for (i = 0; frame->f->data[i]; i++)
+                for (y = 0; y < (s->ps.sps->height >> s->ps.sps->vshift[i]); y++) {
+                    uint8_t *dst = frame->f->data[i] + y * frame->f->linesize[i];
+                    AV_WN16(dst, 1 << (s->ps.sps->bit_depth - 1));
+                    av_memcpy_backptr(dst + 2, 2, 2*(s->ps.sps->width >> s->ps.sps->hshift[i]) - 2);
+                }
+        }
+    }
+
+    frame->poc      = poc;
+    frame->sequence = HEVC_SEQUENCE_COUNTER_INVALID;
+    frame->flags    = 0;
+
+    if (s->threads_type == FF_THREAD_FRAME)
+        ff_progress_frame_report(&frame->tf, INT_MAX);
+
+    return frame;
+}
+
+/* add a reference with the given poc to the list and mark it as used in DPB */
+static int add_candidate_ref(HEVCContext *s, RefPicList *list,
+                             int poc, int ref_flag, uint8_t use_msb)
+{
+    HEVCFrame *ref = find_ref_idx(s, poc, use_msb);
+
+    if (ref == s->cur_frame || list->nb_refs >= HEVC_MAX_REFS)
+        return AVERROR_INVALIDDATA;
+
+    if (!ref) {
+        ref = generate_missing_ref(s, poc);
+        if (!ref)
+            return AVERROR(ENOMEM);
+    }
+
+    list->list[list->nb_refs] = ref->poc;
+    list->ref[list->nb_refs]  = ref;
+    list->nb_refs++;
+
+    mark_ref(ref, ref_flag);
+    return 0;
+}
+
+int ff_hevc_frame_rps(HEVCContext *s)
+{
+    const ShortTermRPS *short_rps = s->sh.short_term_rps;
+    const LongTermRPS  *long_rps  = &s->sh.long_term_rps;
+    RefPicList               *rps = s->rps;
+    int i, ret = 0;
+
+    if (!short_rps) {
+        rps[0].nb_refs = rps[1].nb_refs = 0;
+        return 0;
+    }
+
+    unref_missing_refs(s);
+
+    /* clear the reference flags on all frames except the current one */
+    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+        HEVCFrame *frame = &s->DPB[i];
+
+        if (frame == s->cur_frame)
+            continue;
+
+        mark_ref(frame, 0);
+    }
+
+    for (i = 0; i < NB_RPS_TYPE; i++)
+        rps[i].nb_refs = 0;
+
+    /* add the short refs */
+    for (i = 0; i < short_rps->num_delta_pocs; i++) {
+        int poc = s->poc + short_rps->delta_poc[i];
+        int list;
+
+        if (!(short_rps->used & (1 << i)))
+            list = ST_FOLL;
+        else if (i < short_rps->num_negative_pics)
+            list = ST_CURR_BEF;
+        else
+            list = ST_CURR_AFT;
+
+        ret = add_candidate_ref(s, &rps[list], poc, HEVC_FRAME_FLAG_SHORT_REF, 1);
+        if (ret < 0)
+            goto fail;
+    }
+
+    /* add the long refs */
+    for (i = 0; i < long_rps->nb_refs; i++) {
+        int poc  = long_rps->poc[i];
+        int list = long_rps->used[i] ? LT_CURR : LT_FOLL;
+
+        ret = add_candidate_ref(s, &rps[list], poc, HEVC_FRAME_FLAG_LONG_REF, long_rps->poc_msb_present[i]);
+        if (ret < 0)
+            goto fail;
+    }
+
+fail:
+    /* release any frames that are now unused */
+    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++)
+        ff_hevc_unref_frame(&s->DPB[i], 0);
+
+    return ret;
+}
+
+int ff_hevc_frame_nb_refs(const HEVCContext *s)
+{
+    int ret = 0;
+    int i;
+    const ShortTermRPS *rps = s->sh.short_term_rps;
+    const LongTermRPS *long_rps = &s->sh.long_term_rps;
+
+    if (rps) {
+        for (i = 0; i < rps->num_negative_pics; i++)
+            ret += !!(rps->used & (1 << i));
+        for (; i < rps->num_delta_pocs; i++)
+            ret += !!(rps->used & (1 << i));
+    }
+
+    if (long_rps) {
+        for (i = 0; i < long_rps->nb_refs; i++)
+            ret += !!long_rps->used[i];
+    }
+
+    if (s->ps.pps->pps_curr_pic_ref_enabled_flag)
+        ret++;
+
+    return ret;
+}
diff --git a/libavcodec/hevc/sei.c b/libavcodec/hevc/sei.c
new file mode 100644
index 0000000000..e39ac0c38a
--- /dev/null
+++ b/libavcodec/hevc/sei.c
@@ -0,0 +1,243 @@
+/*
+ * HEVC Supplementary Enhancement Information messages
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ * Copyright (C) 2012 - 2013 Gildas Cocherel
+ * Copyright (C) 2013 Vittorio Giovara
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "bytestream.h"
+#include "golomb.h"
+#include "ps.h"
+#include "sei.h"
+
+static int decode_nal_sei_decoded_picture_hash(HEVCSEIPictureHash *s,
+                                               GetByteContext *gb)
+{
+    int cIdx;
+    uint8_t hash_type;
+    //uint16_t picture_crc;
+    //uint32_t picture_checksum;
+    hash_type = bytestream2_get_byte(gb);
+
+    for (cIdx = 0; cIdx < 3/*((s->sps->chroma_format_idc == 0) ? 1 : 3)*/; cIdx++) {
+        if (hash_type == 0) {
+            s->is_md5 = 1;
+            bytestream2_get_buffer(gb, s->md5[cIdx], sizeof(s->md5[cIdx]));
+        } else if (hash_type == 1) {
+            // picture_crc = get_bits(gb, 16);
+        } else if (hash_type == 2) {
+            // picture_checksum = get_bits_long(gb, 32);
+        }
+    }
+    return 0;
+}
+
+static int decode_nal_sei_pic_timing(HEVCSEI *s, GetBitContext *gb,
+                                     const HEVCParamSets *ps, void *logctx)
+{
+    HEVCSEIPictureTiming *h = &s->picture_timing;
+    const HEVCSPS *sps = ps->sps_list[s->active_seq_parameter_set_id];
+
+    if (!sps)
+        return(AVERROR(ENOMEM));
+
+    if (sps->vui.frame_field_info_present_flag) {
+        int pic_struct = get_bits(gb, 4);
+        h->picture_struct = AV_PICTURE_STRUCTURE_UNKNOWN;
+        if (pic_struct == 2 || pic_struct == 10 || pic_struct == 12) {
+            av_log(logctx, AV_LOG_DEBUG, "BOTTOM Field\n");
+            h->picture_struct = AV_PICTURE_STRUCTURE_BOTTOM_FIELD;
+        } else if (pic_struct == 1 || pic_struct == 9 || pic_struct == 11) {
+            av_log(logctx, AV_LOG_DEBUG, "TOP Field\n");
+            h->picture_struct = AV_PICTURE_STRUCTURE_TOP_FIELD;
+        } else if (pic_struct == 7) {
+            av_log(logctx, AV_LOG_DEBUG, "Frame/Field Doubling\n");
+            h->picture_struct = HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING;
+        } else if (pic_struct == 8) {
+            av_log(logctx, AV_LOG_DEBUG, "Frame/Field Tripling\n");
+            h->picture_struct = HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING;
+        }
+    }
+
+    return 0;
+}
+
+static int decode_nal_sei_active_parameter_sets(HEVCSEI *s, GetBitContext *gb, void *logctx)
+{
+    int num_sps_ids_minus1;
+    unsigned active_seq_parameter_set_id;
+
+    get_bits(gb, 4); // active_video_parameter_set_id
+    get_bits(gb, 1); // self_contained_cvs_flag
+    get_bits(gb, 1); // num_sps_ids_minus1
+    num_sps_ids_minus1 = get_ue_golomb_long(gb); // num_sps_ids_minus1
+
+    if (num_sps_ids_minus1 < 0 || num_sps_ids_minus1 > 15) {
+        av_log(logctx, AV_LOG_ERROR, "num_sps_ids_minus1 %d invalid\n", num_sps_ids_minus1);
+        return AVERROR_INVALIDDATA;
+    }
+
+    active_seq_parameter_set_id = get_ue_golomb_long(gb);
+    if (active_seq_parameter_set_id >= HEVC_MAX_SPS_COUNT) {
+        av_log(logctx, AV_LOG_ERROR, "active_parameter_set_id %d invalid\n", active_seq_parameter_set_id);
+        return AVERROR_INVALIDDATA;
+    }
+    s->active_seq_parameter_set_id = active_seq_parameter_set_id;
+
+    return 0;
+}
+
+static int decode_nal_sei_timecode(HEVCSEITimeCode *s, GetBitContext *gb)
+{
+    s->num_clock_ts = get_bits(gb, 2);
+
+    for (int i = 0; i < s->num_clock_ts; i++) {
+        s->clock_timestamp_flag[i] =  get_bits(gb, 1);
+
+        if (s->clock_timestamp_flag[i]) {
+            s->units_field_based_flag[i] = get_bits(gb, 1);
+            s->counting_type[i]          = get_bits(gb, 5);
+            s->full_timestamp_flag[i]    = get_bits(gb, 1);
+            s->discontinuity_flag[i]     = get_bits(gb, 1);
+            s->cnt_dropped_flag[i]       = get_bits(gb, 1);
+
+            s->n_frames[i]               = get_bits(gb, 9);
+
+            if (s->full_timestamp_flag[i]) {
+                s->seconds_value[i]      = av_clip(get_bits(gb, 6), 0, 59);
+                s->minutes_value[i]      = av_clip(get_bits(gb, 6), 0, 59);
+                s->hours_value[i]        = av_clip(get_bits(gb, 5), 0, 23);
+            } else {
+                s->seconds_flag[i] = get_bits(gb, 1);
+                if (s->seconds_flag[i]) {
+                    s->seconds_value[i] = av_clip(get_bits(gb, 6), 0, 59);
+                    s->minutes_flag[i]  = get_bits(gb, 1);
+                    if (s->minutes_flag[i]) {
+                        s->minutes_value[i] = av_clip(get_bits(gb, 6), 0, 59);
+                        s->hours_flag[i] =  get_bits(gb, 1);
+                        if (s->hours_flag[i]) {
+                            s->hours_value[i] = av_clip(get_bits(gb, 5), 0, 23);
+                        }
+                    }
+                }
+            }
+
+            s->time_offset_length[i] = get_bits(gb, 5);
+            if (s->time_offset_length[i] > 0) {
+                s->time_offset_value[i] = get_bits_long(gb, s->time_offset_length[i]);
+            }
+        }
+    }
+
+    s->present = 1;
+    return 0;
+}
+
+static int decode_nal_sei_prefix(GetBitContext *gb, GetByteContext *gbyte,
+                                 void *logctx, HEVCSEI *s,
+                                 const HEVCParamSets *ps, int type)
+{
+    switch (type) {
+    case 256:  // Mismatched value from HM 8.1
+        return decode_nal_sei_decoded_picture_hash(&s->picture_hash, gbyte);
+    case SEI_TYPE_PIC_TIMING:
+        return decode_nal_sei_pic_timing(s, gb, ps, logctx);
+    case SEI_TYPE_ACTIVE_PARAMETER_SETS:
+        return decode_nal_sei_active_parameter_sets(s, gb, logctx);
+    case SEI_TYPE_TIME_CODE:
+        return decode_nal_sei_timecode(&s->timecode, gb);
+    default: {
+        int ret = ff_h2645_sei_message_decode(&s->common, type, AV_CODEC_ID_HEVC,
+                                              gb, gbyte, logctx);
+        if (ret == FF_H2645_SEI_MESSAGE_UNHANDLED)
+            av_log(logctx, AV_LOG_DEBUG, "Skipped PREFIX SEI %d\n", type);
+        return ret;
+    }
+    }
+}
+
+static int decode_nal_sei_suffix(GetBitContext *gb, GetByteContext *gbyte,
+                                 void *logctx, HEVCSEI *s, int type)
+{
+    switch (type) {
+    case SEI_TYPE_DECODED_PICTURE_HASH:
+        return decode_nal_sei_decoded_picture_hash(&s->picture_hash, gbyte);
+    default:
+        av_log(logctx, AV_LOG_DEBUG, "Skipped SUFFIX SEI %d\n", type);
+        return 0;
+    }
+}
+
+static int decode_nal_sei_message(GetByteContext *gb, void *logctx, HEVCSEI *s,
+                                  const HEVCParamSets *ps, int nal_unit_type)
+{
+    GetByteContext message_gbyte;
+    GetBitContext message_gb;
+    int payload_type = 0;
+    int payload_size = 0;
+    int byte = 0xFF;
+    av_unused int ret;
+    av_log(logctx, AV_LOG_DEBUG, "Decoding SEI\n");
+
+    while (byte == 0xFF) {
+        if (bytestream2_get_bytes_left(gb) < 2 || payload_type > INT_MAX - 255)
+            return AVERROR_INVALIDDATA;
+        byte          = bytestream2_get_byteu(gb);
+        payload_type += byte;
+    }
+    byte = 0xFF;
+    while (byte == 0xFF) {
+        if (bytestream2_get_bytes_left(gb) < 1 + payload_size)
+            return AVERROR_INVALIDDATA;
+        byte          = bytestream2_get_byteu(gb);
+        payload_size += byte;
+    }
+    if (bytestream2_get_bytes_left(gb) < payload_size)
+        return AVERROR_INVALIDDATA;
+    bytestream2_init(&message_gbyte, gb->buffer, payload_size);
+    ret = init_get_bits8(&message_gb, gb->buffer, payload_size);
+    av_assert1(ret >= 0);
+    bytestream2_skipu(gb, payload_size);
+    if (nal_unit_type == HEVC_NAL_SEI_PREFIX) {
+        return decode_nal_sei_prefix(&message_gb, &message_gbyte,
+                                     logctx, s, ps, payload_type);
+    } else { /* nal_unit_type == NAL_SEI_SUFFIX */
+        return decode_nal_sei_suffix(&message_gb, &message_gbyte,
+                                     logctx, s, payload_type);
+    }
+}
+
+int ff_hevc_decode_nal_sei(GetBitContext *gb, void *logctx, HEVCSEI *s,
+                           const HEVCParamSets *ps, enum HEVCNALUnitType type)
+{
+    GetByteContext gbyte;
+    int ret;
+
+    av_assert1((get_bits_count(gb) % 8) == 0);
+    bytestream2_init(&gbyte, gb->buffer + get_bits_count(gb) / 8,
+                     get_bits_left(gb) / 8);
+
+    do {
+        ret = decode_nal_sei_message(&gbyte, logctx, s, ps, type);
+        if (ret < 0)
+            return ret;
+    } while (bytestream2_get_bytes_left(&gbyte) > 0);
+    return 1;
+}
diff --git a/libavcodec/hevc/sei.h b/libavcodec/hevc/sei.h
new file mode 100644
index 0000000000..c97d22d423
--- /dev/null
+++ b/libavcodec/hevc/sei.h
@@ -0,0 +1,112 @@
+/*
+ * HEVC Supplementary Enhancement Information messages
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HEVC_SEI_H
+#define AVCODEC_HEVC_SEI_H
+
+#include <stdint.h>
+
+#include "libavutil/buffer.h"
+
+#include "libavcodec/get_bits.h"
+#include "libavcodec/h2645_sei.h"
+#include "libavcodec/sei.h"
+
+#include "hevc.h"
+
+
+typedef enum {
+        HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING = 7,
+        HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING = 8
+} HEVC_SEI_PicStructType;
+
+typedef struct HEVCSEIPictureHash {
+    uint8_t       md5[3][16];
+    uint8_t is_md5;
+} HEVCSEIPictureHash;
+
+typedef struct HEVCSEIFramePacking {
+    int present;
+    int arrangement_type;
+    int content_interpretation_type;
+    int quincunx_subsampling;
+    int current_frame_is_frame0_flag;
+} HEVCSEIFramePacking;
+
+typedef struct HEVCSEIPictureTiming {
+    int picture_struct;
+} HEVCSEIPictureTiming;
+
+typedef struct HEVCSEIAlternativeTransfer {
+    int present;
+    int preferred_transfer_characteristics;
+} HEVCSEIAlternativeTransfer;
+
+typedef struct HEVCSEITimeCode {
+    int      present;
+    uint8_t  num_clock_ts;
+    uint8_t  clock_timestamp_flag[3];
+    uint8_t  units_field_based_flag[3];
+    uint8_t  counting_type[3];
+    uint8_t  full_timestamp_flag[3];
+    uint8_t  discontinuity_flag[3];
+    uint8_t  cnt_dropped_flag[3];
+    uint16_t n_frames[3];
+    uint8_t  seconds_value[3];
+    uint8_t  minutes_value[3];
+    uint8_t  hours_value[3];
+    uint8_t  seconds_flag[3];
+    uint8_t  minutes_flag[3];
+    uint8_t  hours_flag[3];
+    uint8_t  time_offset_length[3];
+    int32_t  time_offset_value[3];
+} HEVCSEITimeCode;
+
+typedef struct HEVCSEI {
+    H2645SEI common;
+    HEVCSEIPictureHash picture_hash;
+    HEVCSEIPictureTiming picture_timing;
+    int active_seq_parameter_set_id;
+    HEVCSEITimeCode timecode;
+} HEVCSEI;
+
+struct HEVCParamSets;
+
+int ff_hevc_decode_nal_sei(GetBitContext *gb, void *logctx, HEVCSEI *s,
+                           const struct HEVCParamSets *ps, enum HEVCNALUnitType type);
+
+static inline int ff_hevc_sei_ctx_replace(HEVCSEI *dst, const HEVCSEI *src)
+{
+    return ff_h2645_sei_ctx_replace(&dst->common, &src->common);
+}
+
+/**
+ * Reset SEI values that are stored on the Context.
+ * e.g. Caption data that was extracted during NAL
+ * parsing.
+ *
+ * @param sei HEVCSEI.
+ */
+static inline void ff_hevc_reset_sei(HEVCSEI *sei)
+{
+    ff_h2645_sei_reset(&sei->common);
+}
+
+#endif /* AVCODEC_HEVC_SEI_H */
author	Anton Khirnov <anton@khirnov.net>	2024-05-29 09:50:48 +0200
committer	Anton Khirnov <anton@khirnov.net>	2024-06-04 11:46:27 +0200
commit	e4601cc3390eec6ccbfc1139bdd102b4e801ae80 (patch)
tree	3f31fad838457e80f3f922bc597ea85ab2c81319 /libavcodec/hevc
parent	ba56a300a94bdf5520ac1324a8e7fbaeea430904 (diff)
download	ffmpeg-e4601cc3390eec6ccbfc1139bdd102b4e801ae80.tar.gz