aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-07-04 00:34:44 +0200
committerMichael Niedermayer <michaelni@gmx.at>2011-07-04 00:45:21 +0200
commit976a8b217986fecdbe1fdcaa3e14ce9c3c92eb25 (patch)
treed31a42173318b29419733ec4634c1f6f07cdce6c /libavcodec
parent2a375bb400febf8c1a2dfa87c29fd4185663454c (diff)
parent556f8a066cb33241bf29e85d7e24c9acf7ea9043 (diff)
downloadffmpeg-976a8b217986fecdbe1fdcaa3e14ce9c3c92eb25.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: (40 commits) H.264: template left MB handling H.264: faster fill_decode_caches H.264: faster write_back_* H.264: faster fill_filter_caches H.264: make filter_mb_fast support the case of unavailable top mb Do not include log.h in avutil.h Do not include pixfmt.h in avutil.h Do not include rational.h in avutil.h Do not include mathematics.h in avutil.h Do not include intfloat_readwrite.h in avutil.h Remove return statements following infinite loops without break RTSP: Doxygen comment cleanup doxygen: Escape '\' in Doxygen documentation. md5: cosmetics md5: use AV_WL32 to write result md5: add fate test md5: include correct headers md5: fix test program doxygen: Drop array size declarations from Doxygen parameter names. doxygen: Fix parameter names to match the function prototypes. ... Conflicts: libavcodec/x86/dsputil_mmx.c libavformat/flvenc.c libavformat/oggenc.c libavformat/wtv.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/aacps.c2
-rw-r--r--libavcodec/acelp_pitch_delay.c1
-rw-r--r--libavcodec/ansi.c2
-rw-r--r--libavcodec/avcodec.h3
-rw-r--r--libavcodec/cook.c3
-rw-r--r--libavcodec/ffv1.c74
-rw-r--r--libavcodec/flicvideo.c1
-rw-r--r--libavcodec/h264.c286
-rw-r--r--libavcodec/h264.h488
-rw-r--r--libavcodec/h264_cabac.c33
-rw-r--r--libavcodec/h264_cavlc.c2
-rw-r--r--libavcodec/h264_loopfilter.c56
-rw-r--r--libavcodec/h264_mvpred.h1
-rw-r--r--libavcodec/h264idct_template.c1
-rw-r--r--libavcodec/ituh263dec.c1
-rw-r--r--libavcodec/lpc.c2
-rw-r--r--libavcodec/mjpegdec.c41
-rw-r--r--libavcodec/motion_est_template.c2
-rw-r--r--libavcodec/mpeg12.c2
-rw-r--r--libavcodec/mpegvideo_enc.c1
-rw-r--r--libavcodec/pcm.c1
-rw-r--r--libavcodec/shorten.c3
-rw-r--r--libavcodec/snow.c343
-rw-r--r--libavcodec/svq3.c2
-rw-r--r--libavcodec/utils.c1
-rw-r--r--libavcodec/vp8.c2
-rw-r--r--libavcodec/x86/Makefile1
-rw-r--r--libavcodec/x86/dsputil_mmx.c136
-rw-r--r--libavcodec/x86/h264_qpel_10bit.asm891
-rw-r--r--libavcodec/x86/h264_qpel_mmx.c98
-rw-r--r--libavcodec/xsubdec.c2
31 files changed, 1555 insertions, 927 deletions
diff --git a/libavcodec/aacps.c b/libavcodec/aacps.c
index fc124d1972..f2caedb5ce 100644
--- a/libavcodec/aacps.c
+++ b/libavcodec/aacps.c
@@ -77,7 +77,7 @@ static VLC vlc_ps[10];
* @param avctx contains the current codec context
* @param gb pointer to the input bitstream
* @param ps pointer to the Parametric Stereo context
- * @param par pointer to the parameter to be read
+ * @param PAR pointer to the parameter to be read
* @param e envelope to decode
* @param dt 1: time delta-coded, 0: frequency delta-coded
*/
diff --git a/libavcodec/acelp_pitch_delay.c b/libavcodec/acelp_pitch_delay.c
index cddf7262b6..66d65a6903 100644
--- a/libavcodec/acelp_pitch_delay.c
+++ b/libavcodec/acelp_pitch_delay.c
@@ -20,6 +20,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "libavutil/mathematics.h"
#include "avcodec.h"
#include "dsputil.h"
#include "acelp_pitch_delay.h"
diff --git a/libavcodec/ansi.c b/libavcodec/ansi.c
index 7043b7c9d9..ebcc288539 100644
--- a/libavcodec/ansi.c
+++ b/libavcodec/ansi.c
@@ -154,7 +154,7 @@ static void draw_char(AVCodecContext *avctx, int c)
/**
* Execute ANSI escape code
- * @param <0 error
+ * @return 0 on success, negative on error
*/
static int execute_code(AVCodecContext * avctx, int c)
{
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 1eb10ab41d..7e9348adef 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -30,6 +30,9 @@
#include "libavutil/samplefmt.h"
#include "libavutil/avutil.h"
#include "libavutil/cpu.h"
+#include "libavutil/log.h"
+#include "libavutil/pixfmt.h"
+#include "libavutil/rational.h"
#include "libavcodec/version.h"
diff --git a/libavcodec/cook.c b/libavcodec/cook.c
index f0dee7905a..6def1ac2f7 100644
--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c
@@ -335,7 +335,7 @@ static av_cold int cook_decode_close(AVCodecContext *avctx)
* Fill the gain array for the timedomain quantization.
*
* @param gb pointer to the GetBitContext
- * @param gaininfo[9] array of gain indexes
+ * @param gaininfo array[9] of gain indexes
*/
static void decode_gain_info(GetBitContext *gb, int *gaininfo)
@@ -1156,7 +1156,6 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
default:
av_log_ask_for_sample(avctx, "Unknown Cook version.\n");
return -1;
- break;
}
if(s > 1 && q->subpacket[s].samples_per_channel != q->samples_per_channel) {
diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c
index a0a4a1d009..070e0240a2 100644
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -42,25 +42,6 @@
extern const uint8_t ff_log2_run[41];
-static const int8_t quant3[256]={
- 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
-};
-
static const int8_t quant5_10bit[256]={
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -98,42 +79,7 @@ static const int8_t quant5[256]={
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
};
-static const int8_t quant7[256]={
- 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
-};
-static const int8_t quant9[256]={
- 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
-};
+
static const int8_t quant9_10bit[256]={
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
@@ -171,24 +117,6 @@ static const int8_t quant11[256]={
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
};
-static const int8_t quant13[256]={
- 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
-};
static const uint8_t ver2_state[256]= {
0, 10, 10, 10, 10, 16, 16, 16, 28, 16, 16, 29, 42, 49, 20, 49,
diff --git a/libavcodec/flicvideo.c b/libavcodec/flicvideo.c
index 8cc72e241e..f516d23c63 100644
--- a/libavcodec/flicvideo.c
+++ b/libavcodec/flicvideo.c
@@ -112,7 +112,6 @@ static av_cold int flic_decode_init(AVCodecContext *avctx)
case 24 : avctx->pix_fmt = PIX_FMT_BGR24; /* Supposedly BGR, but havent any files to test with */
av_log(avctx, AV_LOG_ERROR, "24Bpp FLC/FLX is unsupported due to no test files.\n");
return -1;
- break;
default :
av_log(avctx, AV_LOG_ERROR, "Unknown FLC/FLX depth of %d Bpp is unsupported.\n",depth);
return -1;
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 8ecf9b4dbd..9265e0ab8a 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -60,15 +60,6 @@ static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
PIX_FMT_NONE
};
-void ff_h264_write_back_intra_pred_mode(H264Context *h){
- int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
-
- AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
- mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
- mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
- mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
-}
-
/**
* checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
*/
@@ -3057,6 +3048,82 @@ int ff_h264_get_slice_type(const H264Context *h)
}
}
+static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncContext * const s, int mb_type, int top_xy,
+ int left_xy[LEFT_MBS], int top_type, int left_type[LEFT_MBS], int mb_xy, int list)
+{
+ int b_stride = h->b_stride;
+ int16_t (*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
+ int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
+ if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
+ if(USES_LIST(top_type, list)){
+ const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride;
+ const int b8_xy= 4*top_xy + 2;
+ int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+ AV_COPY128(mv_dst - 1*8, s->current_picture.motion_val[list][b_xy + 0]);
+ ref_cache[0 - 1*8]=
+ ref_cache[1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
+ ref_cache[2 - 1*8]=
+ ref_cache[3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
+ }else{
+ AV_ZERO128(mv_dst - 1*8);
+ AV_WN32A(&ref_cache[0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+ }
+
+ if(!IS_INTERLACED(mb_type^left_type[LTOP])){
+ if(USES_LIST(left_type[LTOP], list)){
+ const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
+ const int b8_xy= 4*left_xy[LTOP] + 1;
+ int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[LTOP]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+ AV_COPY32(mv_dst - 1 + 0, s->current_picture.motion_val[list][b_xy + b_stride*0]);
+ AV_COPY32(mv_dst - 1 + 8, s->current_picture.motion_val[list][b_xy + b_stride*1]);
+ AV_COPY32(mv_dst - 1 +16, s->current_picture.motion_val[list][b_xy + b_stride*2]);
+ AV_COPY32(mv_dst - 1 +24, s->current_picture.motion_val[list][b_xy + b_stride*3]);
+ ref_cache[-1 + 0]=
+ ref_cache[-1 + 8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
+ ref_cache[-1 + 16]=
+ ref_cache[-1 + 24]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
+ }else{
+ AV_ZERO32(mv_dst - 1 + 0);
+ AV_ZERO32(mv_dst - 1 + 8);
+ AV_ZERO32(mv_dst - 1 +16);
+ AV_ZERO32(mv_dst - 1 +24);
+ ref_cache[-1 + 0]=
+ ref_cache[-1 + 8]=
+ ref_cache[-1 + 16]=
+ ref_cache[-1 + 24]= LIST_NOT_USED;
+ }
+ }
+ }
+
+ if(!USES_LIST(mb_type, list)){
+ fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0,0), 4);
+ AV_WN32A(&ref_cache[0*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+ AV_WN32A(&ref_cache[1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+ AV_WN32A(&ref_cache[2*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+ AV_WN32A(&ref_cache[3*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+ return;
+ }
+
+ {
+ int8_t *ref = &s->current_picture.ref_index[list][4*mb_xy];
+ int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+ uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
+ uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]],ref2frm[list][ref[3]])&0x00FF00FF)*0x0101;
+ AV_WN32A(&ref_cache[0*8], ref01);
+ AV_WN32A(&ref_cache[1*8], ref01);
+ AV_WN32A(&ref_cache[2*8], ref23);
+ AV_WN32A(&ref_cache[3*8], ref23);
+ }
+
+ {
+ int16_t (*mv_src)[2] = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
+ AV_COPY128(mv_dst + 8*0, mv_src + 0*b_stride);
+ AV_COPY128(mv_dst + 8*1, mv_src + 1*b_stride);
+ AV_COPY128(mv_dst + 8*2, mv_src + 2*b_stride);
+ AV_COPY128(mv_dst + 8*3, mv_src + 3*b_stride);
+ }
+}
+
/**
*
* @return non zero if the loop filter can be skiped
@@ -3064,208 +3131,124 @@ int ff_h264_get_slice_type(const H264Context *h)
static int fill_filter_caches(H264Context *h, int mb_type){
MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy;
- int top_xy, left_xy[2];
- int top_type, left_type[2];
+ int top_xy, left_xy[LEFT_MBS];
+ int top_type, left_type[LEFT_MBS];
+ uint8_t *nnz;
+ uint8_t *nnz_cache;
top_xy = mb_xy - (s->mb_stride << MB_FIELD);
- //FIXME deblocking could skip the intra and nnz parts.
-
/* Wow, what a mess, why didn't they simplify the interlacing & intra
* stuff, I can't imagine that these complex rules are worth it. */
- left_xy[1] = left_xy[0] = mb_xy-1;
+ left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
if(FRAME_MBAFF){
const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
const int curr_mb_field_flag = IS_INTERLACED(mb_type);
if(s->mb_y&1){
if (left_mb_field_flag != curr_mb_field_flag) {
- left_xy[0] -= s->mb_stride;
+ left_xy[LTOP] -= s->mb_stride;
}
}else{
if(curr_mb_field_flag){
top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1);
}
if (left_mb_field_flag != curr_mb_field_flag) {
- left_xy[1] += s->mb_stride;
+ left_xy[LBOT] += s->mb_stride;
}
}
}
h->top_mb_xy = top_xy;
- h->left_mb_xy[0] = left_xy[0];
- h->left_mb_xy[1] = left_xy[1];
+ h->left_mb_xy[LTOP] = left_xy[LTOP];
+ h->left_mb_xy[LBOT] = left_xy[LBOT];
{
//for sufficiently low qp, filtering wouldn't do anything
//this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
int qp = s->current_picture.qscale_table[mb_xy];
if(qp <= qp_thresh
- && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
- && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){
+ && (left_xy[LTOP]<0 || ((qp + s->current_picture.qscale_table[left_xy[LTOP]] + 1)>>1) <= qp_thresh)
+ && (top_xy <0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){
if(!FRAME_MBAFF)
return 1;
- if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh)
- && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh))
+ if( (left_xy[LTOP]< 0 || ((qp + s->current_picture.qscale_table[left_xy[LBOT] ] + 1)>>1) <= qp_thresh)
+ && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh))
return 1;
}
}
- top_type = s->current_picture.mb_type[top_xy] ;
- left_type[0] = s->current_picture.mb_type[left_xy[0]];
- left_type[1] = s->current_picture.mb_type[left_xy[1]];
+ top_type = s->current_picture.mb_type[top_xy];
+ left_type[LTOP] = s->current_picture.mb_type[left_xy[LTOP]];
+ left_type[LBOT] = s->current_picture.mb_type[left_xy[LBOT]];
if(h->deblocking_filter == 2){
- if(h->slice_table[top_xy ] != h->slice_num) top_type= 0;
- if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
+ if(h->slice_table[top_xy ] != h->slice_num) top_type= 0;
+ if(h->slice_table[left_xy[LBOT]] != h->slice_num) left_type[LTOP]= left_type[LBOT]= 0;
}else{
- if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0;
- if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
+ if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0;
+ if(h->slice_table[left_xy[LBOT]] == 0xFFFF) left_type[LTOP]= left_type[LBOT] =0;
}
- h->top_type = top_type ;
- h->left_type[0]= left_type[0];
- h->left_type[1]= left_type[1];
+ h->top_type = top_type;
+ h->left_type[LTOP]= left_type[LTOP];
+ h->left_type[LBOT]= left_type[LBOT];
if(IS_INTRA(mb_type))
return 0;
- AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]);
- AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]);
- AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]);
- AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]);
+ fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 0);
+ if(h->list_count == 2)
+ fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 1);
+ nnz = h->non_zero_count[mb_xy];
+ nnz_cache = h->non_zero_count_cache;
+ AV_COPY32(&nnz_cache[4+8*1], &nnz[ 0]);
+ AV_COPY32(&nnz_cache[4+8*2], &nnz[ 4]);
+ AV_COPY32(&nnz_cache[4+8*3], &nnz[ 8]);
+ AV_COPY32(&nnz_cache[4+8*4], &nnz[12]);
h->cbp= h->cbp_table[mb_xy];
- {
- int list;
- for(list=0; list<h->list_count; list++){
- int8_t *ref;
- int y, b_stride;
- int16_t (*mv_dst)[2];
- int16_t (*mv_src)[2];
-
- if(!USES_LIST(mb_type, list)){
- fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
- AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
- AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
- AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
- AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
- continue;
- }
-
- ref = &s->current_picture.ref_index[list][4*mb_xy];
- {
- int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
- AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
- AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
- ref += 2;
- AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
- AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
- }
-
- b_stride = h->b_stride;
- mv_dst = &h->mv_cache[list][scan8[0]];
- mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
- for(y=0; y<4; y++){
- AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
- }
-
- }
- }
-
-
-/*
-0 . T T. T T T T
-1 L . .L . . . .
-2 L . .L . . . .
-3 . T TL . . . .
-4 L . .L . . . .
-5 L . .. . . . .
-*/
-//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
if(top_type){
- AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]);
+ nnz = h->non_zero_count[top_xy];
+ AV_COPY32(&nnz_cache[4+8*0], &nnz[3*4]);
}
- if(left_type[0]){
- h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4];
- h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4];
- h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4];
- h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4];
+ if(left_type[LTOP]){
+ nnz = h->non_zero_count[left_xy[LTOP]];
+ nnz_cache[3+8*1]= nnz[3+0*4];
+ nnz_cache[3+8*2]= nnz[3+1*4];
+ nnz_cache[3+8*3]= nnz[3+2*4];
+ nnz_cache[3+8*4]= nnz[3+3*4];
}
// CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
if(!CABAC && h->pps.transform_8x8_mode){
if(IS_8x8DCT(top_type)){
- h->non_zero_count_cache[4+8*0]=
- h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
- h->non_zero_count_cache[6+8*0]=
- h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
+ nnz_cache[4+8*0]=
+ nnz_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
+ nnz_cache[6+8*0]=
+ nnz_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
}
- if(IS_8x8DCT(left_type[0])){
- h->non_zero_count_cache[3+8*1]=
- h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
+ if(IS_8x8DCT(left_type[LTOP])){
+ nnz_cache[3+8*1]=
+ nnz_cache[3+8*2]= (h->cbp_table[left_xy[LTOP]]&0x2000) >> 12; //FIXME check MBAFF
}
- if(IS_8x8DCT(left_type[1])){
- h->non_zero_count_cache[3+8*3]=
- h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
+ if(IS_8x8DCT(left_type[LBOT])){
+ nnz_cache[3+8*3]=
+ nnz_cache[3+8*4]= (h->cbp_table[left_xy[LBOT]]&0x8000) >> 12; //FIXME check MBAFF
}
if(IS_8x8DCT(mb_type)){
- h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
- h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= (h->cbp & 0x1000) >> 12;
-
- h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
- h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
+ nnz_cache[scan8[0 ]]= nnz_cache[scan8[1 ]]=
+ nnz_cache[scan8[2 ]]= nnz_cache[scan8[3 ]]= (h->cbp & 0x1000) >> 12;
- h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
- h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
+ nnz_cache[scan8[0+ 4]]= nnz_cache[scan8[1+ 4]]=
+ nnz_cache[scan8[2+ 4]]= nnz_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
- h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
- h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
- }
- }
+ nnz_cache[scan8[0+ 8]]= nnz_cache[scan8[1+ 8]]=
+ nnz_cache[scan8[2+ 8]]= nnz_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
- if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
- int list;
- for(list=0; list<h->list_count; list++){
- if(USES_LIST(top_type, list)){
- const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
- const int b8_xy= 4*top_xy + 2;
- int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
- AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
- h->ref_cache[list][scan8[0] + 0 - 1*8]=
- h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
- h->ref_cache[list][scan8[0] + 2 - 1*8]=
- h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
- }else{
- AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
- AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
- }
-
- if(!IS_INTERLACED(mb_type^left_type[0])){
- if(USES_LIST(left_type[0], list)){
- const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
- const int b8_xy= 4*left_xy[0] + 1;
- int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
- AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
- AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
- AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
- AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
- h->ref_cache[list][scan8[0] - 1 + 0 ]=
- h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
- h->ref_cache[list][scan8[0] - 1 +16 ]=
- h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
- }else{
- AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
- AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
- AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
- AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
- h->ref_cache[list][scan8[0] - 1 + 0 ]=
- h->ref_cache[list][scan8[0] - 1 + 8 ]=
- h->ref_cache[list][scan8[0] - 1 + 16 ]=
- h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
- }
- }
+ nnz_cache[scan8[0+12]]= nnz_cache[scan8[1+12]]=
+ nnz_cache[scan8[2+12]]= nnz_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
}
}
@@ -3556,7 +3539,6 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
ff_draw_horiz_band(s, 16*s->mb_y, 16);
}
#endif
- return -1; //not reached
}
/**
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 4188ad922c..461c490276 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -70,6 +70,10 @@
#define MB_FIELD h->mb_field_decoding_flag
#define FRAME_MBAFF h->mb_aff_frame
#define FIELD_PICTURE (s->picture_structure != PICT_FRAME)
+#define LEFT_MBS 2
+#define LTOP 0
+#define LBOT 1
+#define LEFT(i) (i)
#else
#define MB_MBAFF 0
#define MB_FIELD 0
@@ -77,6 +81,10 @@
#define FIELD_PICTURE 0
#undef IS_INTERLACED
#define IS_INTERLACED(mb_type) 0
+#define LEFT_MBS 1
+#define LTOP 0
+#define LBOT 0
+#define LEFT(i) 0
#endif
#define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE)
@@ -272,12 +280,12 @@ typedef struct H264Context{
int topleft_mb_xy;
int top_mb_xy;
int topright_mb_xy;
- int left_mb_xy[2];
+ int left_mb_xy[LEFT_MBS];
int topleft_type;
int top_type;
int topright_type;
- int left_type[2];
+ int left_type[LEFT_MBS];
const uint8_t * left_block;
int topleft_partition;
@@ -308,11 +316,6 @@ typedef struct H264Context{
#define PART_NOT_AVAILABLE -2
/**
- * is 1 if the specific list MV&references are set to 0,0,-2.
- */
- int mv_cache_clean[2];
-
- /**
* number of neighbors (top and/or left) that used 8x8 dct
*/
int neighbor_transform_size;
@@ -658,7 +661,6 @@ int ff_h264_check_intra4x4_pred_mode(H264Context *h);
*/
int ff_h264_check_intra_pred_mode(H264Context *h, int mode);
-void ff_h264_write_back_intra_pred_mode(H264Context *h);
void ff_h264_hl_decode_mb(H264Context *h);
int ff_h264_frame_start(H264Context *h);
int ff_h264_decode_extradata(H264Context *h);
@@ -773,7 +775,7 @@ static inline void pred_pskip_motion(H264Context * const h, int * const mx, int
static void fill_decode_neighbors(H264Context *h, int mb_type){
MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy;
- int topleft_xy, top_xy, topright_xy, left_xy[2];
+ int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS];
static const uint8_t left_block_options[4][32]={
{0,1,2,3,7,10,8,11,3+0*4, 3+1*4, 3+2*4, 3+3*4, 1+4*4, 1+8*4, 1+5*4, 1+9*4},
{2,2,3,3,8,11,8,11,3+2*4, 3+2*4, 3+3*4, 3+3*4, 1+5*4, 1+9*4, 1+5*4, 1+9*4},
@@ -790,16 +792,16 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
topleft_xy = top_xy - 1;
topright_xy= top_xy + 1;
- left_xy[1] = left_xy[0] = mb_xy-1;
+ left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
h->left_block = left_block_options[0];
if(FRAME_MBAFF){
const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
const int curr_mb_field_flag = IS_INTERLACED(mb_type);
if(s->mb_y&1){
if (left_mb_field_flag != curr_mb_field_flag) {
- left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1;
+ left_xy[LBOT] = left_xy[LTOP] = mb_xy - s->mb_stride - 1;
if (curr_mb_field_flag) {
- left_xy[1] += s->mb_stride;
+ left_xy[LBOT] += s->mb_stride;
h->left_block = left_block_options[3];
} else {
topleft_xy += s->mb_stride;
@@ -816,7 +818,7 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
}
if (left_mb_field_flag != curr_mb_field_flag) {
if (curr_mb_field_flag) {
- left_xy[1] += s->mb_stride;
+ left_xy[LBOT] += s->mb_stride;
h->left_block = left_block_options[3];
} else {
h->left_block = left_block_options[2];
@@ -828,25 +830,25 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
h->topleft_mb_xy = topleft_xy;
h->top_mb_xy = top_xy;
h->topright_mb_xy= topright_xy;
- h->left_mb_xy[0] = left_xy[0];
- h->left_mb_xy[1] = left_xy[1];
+ h->left_mb_xy[LTOP] = left_xy[LTOP];
+ h->left_mb_xy[LBOT] = left_xy[LBOT];
//FIXME do we need all in the context?
h->topleft_type = s->current_picture.mb_type[topleft_xy] ;
h->top_type = s->current_picture.mb_type[top_xy] ;
h->topright_type= s->current_picture.mb_type[topright_xy];
- h->left_type[0] = s->current_picture.mb_type[left_xy[0]] ;
- h->left_type[1] = s->current_picture.mb_type[left_xy[1]] ;
+ h->left_type[LTOP] = s->current_picture.mb_type[left_xy[LTOP]] ;
+ h->left_type[LBOT] = s->current_picture.mb_type[left_xy[LBOT]] ;
if(FMO){
- if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0;
- if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0;
- if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0;
+ if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0;
+ if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0;
+ if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0;
}else{
if(h->slice_table[topleft_xy ] != h->slice_num){
h->topleft_type = 0;
- if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0;
- if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0;
+ if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0;
+ if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0;
}
}
if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0;
@@ -854,21 +856,23 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
static void fill_decode_caches(H264Context *h, int mb_type){
MpegEncContext * const s = &h->s;
- int topleft_xy, top_xy, topright_xy, left_xy[2];
- int topleft_type, top_type, topright_type, left_type[2];
+ int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS];
+ int topleft_type, top_type, topright_type, left_type[LEFT_MBS];
const uint8_t * left_block= h->left_block;
int i;
-
- topleft_xy = h->topleft_mb_xy ;
- top_xy = h->top_mb_xy ;
- topright_xy = h->topright_mb_xy;
- left_xy[0] = h->left_mb_xy[0] ;
- left_xy[1] = h->left_mb_xy[1] ;
- topleft_type = h->topleft_type ;
- top_type = h->top_type ;
- topright_type= h->topright_type ;
- left_type[0] = h->left_type[0] ;
- left_type[1] = h->left_type[1] ;
+ uint8_t *nnz;
+ uint8_t *nnz_cache;
+
+ topleft_xy = h->topleft_mb_xy;
+ top_xy = h->top_mb_xy;
+ topright_xy = h->topright_mb_xy;
+ left_xy[LTOP] = h->left_mb_xy[LTOP];
+ left_xy[LBOT] = h->left_mb_xy[LBOT];
+ topleft_type = h->topleft_type;
+ top_type = h->top_type;
+ topright_type = h->topright_type;
+ left_type[LTOP]= h->left_type[LTOP];
+ left_type[LBOT]= h->left_type[LBOT];
if(!IS_SKIP(mb_type)){
if(IS_INTRA(mb_type)){
@@ -883,27 +887,27 @@ static void fill_decode_caches(H264Context *h, int mb_type){
h->top_samples_available= 0x33FF;
h->topright_samples_available= 0x26EA;
}
- if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
+ if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])){
if(IS_INTERLACED(mb_type)){
- if(!(left_type[0] & type_mask)){
+ if(!(left_type[LTOP] & type_mask)){
h->topleft_samples_available&= 0xDFFF;
h->left_samples_available&= 0x5FFF;
}
- if(!(left_type[1] & type_mask)){
+ if(!(left_type[LBOT] & type_mask)){
h->topleft_samples_available&= 0xFF5F;
h->left_samples_available&= 0xFF5F;
}
}else{
- int left_typei = s->current_picture.mb_type[left_xy[0] + s->mb_stride];
+ int left_typei = s->current_picture.mb_type[left_xy[LTOP] + s->mb_stride];
- assert(left_xy[0] == left_xy[1]);
- if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
+ assert(left_xy[LTOP] == left_xy[LBOT]);
+ if(!((left_typei & type_mask) && (left_type[LTOP] & type_mask))){
h->topleft_samples_available&= 0xDF5F;
h->left_samples_available&= 0x5F5F;
}
}
}else{
- if(!(left_type[0] & type_mask)){
+ if(!(left_type[LTOP] & type_mask)){
h->topleft_samples_available&= 0xDF5F;
h->left_samples_available&= 0x5F5F;
}
@@ -925,13 +929,13 @@ static void fill_decode_caches(H264Context *h, int mb_type){
h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask);
}
for(i=0; i<2; i++){
- if(IS_INTRA4x4(left_type[i])){
- int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[i]];
+ if(IS_INTRA4x4(left_type[LEFT(i)])){
+ int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]];
h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]];
h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]];
}else{
h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
- h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[i] & type_mask);
+ h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[LEFT(i)] & type_mask);
}
}
}
@@ -947,42 +951,45 @@ static void fill_decode_caches(H264Context *h, int mb_type){
5 L . .. . . . .
*/
//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
+ nnz_cache = h->non_zero_count_cache;
if(top_type){
- AV_COPY32(&h->non_zero_count_cache[4+8* 0], &h->non_zero_count[top_xy][4*3]);
+ nnz = h->non_zero_count[top_xy];
+ AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]);
if(CHROMA444){
- AV_COPY32(&h->non_zero_count_cache[4+8* 5], &h->non_zero_count[top_xy][4* 7]);
- AV_COPY32(&h->non_zero_count_cache[4+8*10], &h->non_zero_count[top_xy][4*11]);
+ AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]);
+ AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]);
}else{
- AV_COPY32(&h->non_zero_count_cache[4+8* 5], &h->non_zero_count[top_xy][4* 5]);
- AV_COPY32(&h->non_zero_count_cache[4+8*10], &h->non_zero_count[top_xy][4* 9]);
+ AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 5]);
+ AV_COPY32(&nnz_cache[4+8*10], &nnz[4* 9]);
}
}else{
uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
- AV_WN32A(&h->non_zero_count_cache[4+8* 0], top_empty);
- AV_WN32A(&h->non_zero_count_cache[4+8* 5], top_empty);
- AV_WN32A(&h->non_zero_count_cache[4+8*10], top_empty);
+ AV_WN32A(&nnz_cache[4+8* 0], top_empty);
+ AV_WN32A(&nnz_cache[4+8* 5], top_empty);
+ AV_WN32A(&nnz_cache[4+8*10], top_empty);
}
for (i=0; i<2; i++) {
- if(left_type[i]){
- h->non_zero_count_cache[3+8* 1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]];
- h->non_zero_count_cache[3+8* 2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]];
+ if(left_type[LEFT(i)]){
+ nnz = h->non_zero_count[left_xy[LEFT(i)]];
+ nnz_cache[3+8* 1 + 2*8*i]= nnz[left_block[8+0+2*i]];
+ nnz_cache[3+8* 2 + 2*8*i]= nnz[left_block[8+1+2*i]];
if(CHROMA444){
- h->non_zero_count_cache[3+8* 6 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]+4*4];
- h->non_zero_count_cache[3+8* 7 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]+4*4];
- h->non_zero_count_cache[3+8*11 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]+8*4];
- h->non_zero_count_cache[3+8*12 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]+8*4];
+ nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]+4*4];
+ nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4];
+ nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4];
+ nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4];
}else{
- h->non_zero_count_cache[3+8* 6 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]];
- h->non_zero_count_cache[3+8*11 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]];
+ nnz_cache[3+8* 6 + 8*i]= nnz[left_block[8+4+2*i]];
+ nnz_cache[3+8*11 + 8*i]= nnz[left_block[8+5+2*i]];
}
}else{
- h->non_zero_count_cache[3+8* 1 + 2*8*i]=
- h->non_zero_count_cache[3+8* 2 + 2*8*i]=
- h->non_zero_count_cache[3+8* 6 + 2*8*i]=
- h->non_zero_count_cache[3+8* 7 + 2*8*i]=
- h->non_zero_count_cache[3+8*11 + 2*8*i]=
- h->non_zero_count_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
+ nnz_cache[3+8* 1 + 2*8*i]=
+ nnz_cache[3+8* 2 + 2*8*i]=
+ nnz_cache[3+8* 6 + 2*8*i]=
+ nnz_cache[3+8* 7 + 2*8*i]=
+ nnz_cache[3+8*11 + 2*8*i]=
+ nnz_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
}
}
@@ -994,10 +1001,10 @@ static void fill_decode_caches(H264Context *h, int mb_type){
h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
}
// left_cbp
- if (left_type[0]) {
- h->left_cbp = (h->cbp_table[left_xy[0]] & 0x7F0)
- | ((h->cbp_table[left_xy[0]]>>(left_block[0]&(~1)))&2)
- | (((h->cbp_table[left_xy[1]]>>(left_block[2]&(~1)))&2) << 2);
+ if (left_type[LTOP]) {
+ h->left_cbp = (h->cbp_table[left_xy[LTOP]] & 0x7F0)
+ | ((h->cbp_table[left_xy[LTOP]]>>(left_block[0]&(~1)))&2)
+ | (((h->cbp_table[left_xy[LBOT]]>>(left_block[2]&(~1)))&2) << 2);
} else {
h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
}
@@ -1006,144 +1013,145 @@ static void fill_decode_caches(H264Context *h, int mb_type){
if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){
int list;
+ int b_stride = h->b_stride;
for(list=0; list<h->list_count; list++){
+ int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
+ int8_t *ref = s->current_picture.ref_index[list];
+ int16_t (*mv_cache)[2] = &h->mv_cache[list][scan8[0]];
+ int16_t (*mv)[2] = s->current_picture.motion_val[list];
if(!USES_LIST(mb_type, list)){
- /*if(!h->mv_cache_clean[list]){
- memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
- memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
- h->mv_cache_clean[list]= 1;
- }*/
continue;
}
assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred));
- h->mv_cache_clean[list]= 0;
-
if(USES_LIST(top_type, list)){
- const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
- AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
- h->ref_cache[list][scan8[0] + 0 - 1*8]=
- h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][4*top_xy + 2];
- h->ref_cache[list][scan8[0] + 2 - 1*8]=
- h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][4*top_xy + 3];
+ const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride;
+ AV_COPY128(mv_cache[0 - 1*8], mv[b_xy + 0]);
+ ref_cache[0 - 1*8]=
+ ref_cache[1 - 1*8]= ref[4*top_xy + 2];
+ ref_cache[2 - 1*8]=
+ ref_cache[3 - 1*8]= ref[4*top_xy + 3];
}else{
- AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
- AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101);
+ AV_ZERO128(mv_cache[0 - 1*8]);
+ AV_WN32A(&ref_cache[0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101);
}
if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){
for(i=0; i<2; i++){
- int cache_idx = scan8[0] - 1 + i*2*8;
- if(USES_LIST(left_type[i], list)){
- const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
- const int b8_xy= 4*left_xy[i] + 1;
- AV_COPY32(h->mv_cache[list][cache_idx ], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]);
- AV_COPY32(h->mv_cache[list][cache_idx+8], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]);
- h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + (left_block[0+i*2]&~1)];
- h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + (left_block[1+i*2]&~1)];
+ int cache_idx = -1 + i*2*8;
+ if(USES_LIST(left_type[LEFT(i)], list)){
+ const int b_xy= h->mb2b_xy[left_xy[LEFT(i)]] + 3;
+ const int b8_xy= 4*left_xy[LEFT(i)] + 1;
+ AV_COPY32(mv_cache[cache_idx ], mv[b_xy + b_stride*left_block[0+i*2]]);
+ AV_COPY32(mv_cache[cache_idx+8], mv[b_xy + b_stride*left_block[1+i*2]]);
+ ref_cache[cache_idx ]= ref[b8_xy + (left_block[0+i*2]&~1)];
+ ref_cache[cache_idx+8]= ref[b8_xy + (left_block[1+i*2]&~1)];
}else{
- AV_ZERO32(h->mv_cache [list][cache_idx ]);
- AV_ZERO32(h->mv_cache [list][cache_idx+8]);
- h->ref_cache[list][cache_idx ]=
- h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+ AV_ZERO32(mv_cache[cache_idx ]);
+ AV_ZERO32(mv_cache[cache_idx+8]);
+ ref_cache[cache_idx ]=
+ ref_cache[cache_idx+8]= (left_type[LEFT(i)]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
}
}
}else{
- if(USES_LIST(left_type[0], list)){
- const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
- const int b8_xy= 4*left_xy[0] + 1;
- AV_COPY32(h->mv_cache[list][scan8[0] - 1], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]);
- h->ref_cache[list][scan8[0] - 1]= s->current_picture.ref_index[list][b8_xy + (left_block[0]&~1)];
+ if(USES_LIST(left_type[LTOP], list)){
+ const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
+ const int b8_xy= 4*left_xy[LTOP] + 1;
+ AV_COPY32(mv_cache[-1], mv[b_xy + b_stride*left_block[0]]);
+ ref_cache[-1]= ref[b8_xy + (left_block[0]&~1)];
}else{
- AV_ZERO32(h->mv_cache [list][scan8[0] - 1]);
- h->ref_cache[list][scan8[0] - 1]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+ AV_ZERO32(mv_cache[-1]);
+ ref_cache[-1]= left_type[LTOP] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
}
}
if(USES_LIST(topright_type, list)){
- const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
- AV_COPY32(h->mv_cache[list][scan8[0] + 4 - 1*8], s->current_picture.motion_val[list][b_xy]);
- h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][4*topright_xy + 2];
+ const int b_xy= h->mb2b_xy[topright_xy] + 3*b_stride;
+ AV_COPY32(mv_cache[4 - 1*8], mv[b_xy]);
+ ref_cache[4 - 1*8]= ref[4*topright_xy + 2];
}else{
- AV_ZERO32(h->mv_cache [list][scan8[0] + 4 - 1*8]);
- h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+ AV_ZERO32(mv_cache[4 - 1*8]);
+ ref_cache[4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
}
- if(h->ref_cache[list][scan8[0] + 4 - 1*8] < 0){
+ if(ref_cache[4 - 1*8] < 0){
if(USES_LIST(topleft_type, list)){
- const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride);
+ const int b_xy = h->mb2b_xy[topleft_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride);
const int b8_xy= 4*topleft_xy + 1 + (h->topleft_partition & 2);
- AV_COPY32(h->mv_cache[list][scan8[0] - 1 - 1*8], s->current_picture.motion_val[list][b_xy]);
- h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
+ AV_COPY32(mv_cache[-1 - 1*8], mv[b_xy]);
+ ref_cache[-1 - 1*8]= ref[b8_xy];
}else{
- AV_ZERO32(h->mv_cache[list][scan8[0] - 1 - 1*8]);
- h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+ AV_ZERO32(mv_cache[-1 - 1*8]);
+ ref_cache[-1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
}
}
if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF)
continue;
- if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))) {
- h->ref_cache[list][scan8[4 ]] =
- h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
- AV_ZERO32(h->mv_cache [list][scan8[4 ]]);
- AV_ZERO32(h->mv_cache [list][scan8[12]]);
-
- if( CABAC ) {
- /* XXX beurk, Load mvd */
- if(USES_LIST(top_type, list)){
- const int b_xy= h->mb2br_xy[top_xy];
- AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]);
- }else{
- AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]);
- }
- if(USES_LIST(left_type[0], list)){
- const int b_xy= h->mb2br_xy[left_xy[0]] + 6;
- AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy - left_block[0]]);
- AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy - left_block[1]]);
- }else{
- AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 0*8]);
- AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]);
- }
- if(USES_LIST(left_type[1], list)){
- const int b_xy= h->mb2br_xy[left_xy[1]] + 6;
- AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy - left_block[2]]);
- AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy - left_block[3]]);
- }else{
- AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 2*8]);
- AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 3*8]);
- }
- AV_ZERO16(h->mvd_cache [list][scan8[4 ]]);
- AV_ZERO16(h->mvd_cache [list][scan8[12]]);
- if(h->slice_type_nos == AV_PICTURE_TYPE_B){
- fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1);
-
- if(IS_DIRECT(top_type)){
- AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101u*(MB_TYPE_DIRECT2>>1));
- }else if(IS_8X8(top_type)){
- int b8_xy = 4*top_xy;
- h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy + 2];
- h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 3];
+ if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))){
+ uint8_t (*mvd_cache)[2] = &h->mvd_cache[list][scan8[0]];
+ uint8_t (*mvd)[2] = h->mvd_table[list];
+ ref_cache[2+8*0] =
+ ref_cache[2+8*2] = PART_NOT_AVAILABLE;
+ AV_ZERO32(mv_cache[2+8*0]);
+ AV_ZERO32(mv_cache[2+8*2]);
+
+ if( CABAC ) {
+ if(USES_LIST(top_type, list)){
+ const int b_xy= h->mb2br_xy[top_xy];
+ AV_COPY64(mvd_cache[0 - 1*8], mvd[b_xy + 0]);
}else{
- AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_16x16>>1));
+ AV_ZERO64(mvd_cache[0 - 1*8]);
+ }
+ if(USES_LIST(left_type[LTOP], list)){
+ const int b_xy= h->mb2br_xy[left_xy[LTOP]] + 6;
+ AV_COPY16(mvd_cache[-1 + 0*8], mvd[b_xy - left_block[0]]);
+ AV_COPY16(mvd_cache[-1 + 1*8], mvd[b_xy - left_block[1]]);
+ }else{
+ AV_ZERO16(mvd_cache[-1 + 0*8]);
+ AV_ZERO16(mvd_cache[-1 + 1*8]);
+ }
+ if(USES_LIST(left_type[LBOT], list)){
+ const int b_xy= h->mb2br_xy[left_xy[LBOT]] + 6;
+ AV_COPY16(mvd_cache[-1 + 2*8], mvd[b_xy - left_block[2]]);
+ AV_COPY16(mvd_cache[-1 + 3*8], mvd[b_xy - left_block[3]]);
+ }else{
+ AV_ZERO16(mvd_cache[-1 + 2*8]);
+ AV_ZERO16(mvd_cache[-1 + 3*8]);
+ }
+ AV_ZERO16(mvd_cache[2+8*0]);
+ AV_ZERO16(mvd_cache[2+8*2]);
+ if(h->slice_type_nos == AV_PICTURE_TYPE_B){
+ uint8_t *direct_cache = &h->direct_cache[scan8[0]];
+ uint8_t *direct_table = h->direct_table;
+ fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16>>1, 1);
+
+ if(IS_DIRECT(top_type)){
+ AV_WN32A(&direct_cache[-1*8], 0x01010101u*(MB_TYPE_DIRECT2>>1));
+ }else if(IS_8X8(top_type)){
+ int b8_xy = 4*top_xy;
+ direct_cache[0 - 1*8]= direct_table[b8_xy + 2];
+ direct_cache[2 - 1*8]= direct_table[b8_xy + 3];
+ }else{
+ AV_WN32A(&direct_cache[-1*8], 0x01010101*(MB_TYPE_16x16>>1));
+ }
+
+ if(IS_DIRECT(left_type[LTOP]))
+ direct_cache[-1 + 0*8]= MB_TYPE_DIRECT2>>1;
+ else if(IS_8X8(left_type[LTOP]))
+ direct_cache[-1 + 0*8]= direct_table[4*left_xy[LTOP] + 1 + (left_block[0]&~1)];
+ else
+ direct_cache[-1 + 0*8]= MB_TYPE_16x16>>1;
+
+ if(IS_DIRECT(left_type[LBOT]))
+ direct_cache[-1 + 2*8]= MB_TYPE_DIRECT2>>1;
+ else if(IS_8X8(left_type[LBOT]))
+ direct_cache[-1 + 2*8]= direct_table[4*left_xy[LBOT] + 1 + (left_block[2]&~1)];
+ else
+ direct_cache[-1 + 2*8]= MB_TYPE_16x16>>1;
}
-
- if(IS_DIRECT(left_type[0]))
- h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1;
- else if(IS_8X8(left_type[0]))
- h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[4*left_xy[0] + 1 + (left_block[0]&~1)];
- else
- h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_16x16>>1;
-
- if(IS_DIRECT(left_type[1]))
- h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_DIRECT2>>1;
- else if(IS_8X8(left_type[1]))
- h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[4*left_xy[1] + 1 + (left_block[2]&~1)];
- else
- h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_16x16>>1;
}
}
- }
if(FRAME_MBAFF){
#define MAP_MVS\
MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
@@ -1152,10 +1160,10 @@ static void fill_decode_caches(H264Context *h, int mb_type){
MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
- MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
- MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
- MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
- MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
+ MAP_F2F(scan8[0] - 1 + 0*8, left_type[LTOP])\
+ MAP_F2F(scan8[0] - 1 + 1*8, left_type[LTOP])\
+ MAP_F2F(scan8[0] - 1 + 2*8, left_type[LBOT])\
+ MAP_F2F(scan8[0] - 1 + 3*8, left_type[LBOT])
if(MB_FIELD){
#define MAP_F2F(idx, mb_type)\
if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
@@ -1179,13 +1187,13 @@ static void fill_decode_caches(H264Context *h, int mb_type){
}
}
- h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
+ h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]);
}
/**
* gets the predicted intra4x4 prediction mode.
*/
-static inline int pred_intra_mode(H264Context *h, int n){
+static av_always_inline int pred_intra_mode(H264Context *h, int n){
const int index8= scan8[n];
const int left= h->intra4x4_pred_mode_cache[index8 - 1];
const int top = h->intra4x4_pred_mode_cache[index8 - 8];
@@ -1197,69 +1205,83 @@ static inline int pred_intra_mode(H264Context *h, int n){
else return min;
}
-static inline void write_back_non_zero_count(H264Context *h){
- const int mb_xy= h->mb_xy;
+static av_always_inline void write_back_intra_pred_mode(H264Context *h){
+ int8_t *i4x4= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
+ int8_t *i4x4_cache= h->intra4x4_pred_mode_cache;
+
+ AV_COPY32(i4x4, i4x4_cache + 4 + 8*4);
+ i4x4[4]= i4x4_cache[7+8*3];
+ i4x4[5]= i4x4_cache[7+8*2];
+ i4x4[6]= i4x4_cache[7+8*1];
+}
- AV_COPY32(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[4+8* 1]);
- AV_COPY32(&h->non_zero_count[mb_xy][ 4], &h->non_zero_count_cache[4+8* 2]);
- AV_COPY32(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[4+8* 3]);
- AV_COPY32(&h->non_zero_count[mb_xy][12], &h->non_zero_count_cache[4+8* 4]);
- AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[4+8* 6]);
- AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8* 7]);
- AV_COPY32(&h->non_zero_count[mb_xy][32], &h->non_zero_count_cache[4+8*11]);
- AV_COPY32(&h->non_zero_count[mb_xy][36], &h->non_zero_count_cache[4+8*12]);
+static av_always_inline void write_back_non_zero_count(H264Context *h){
+ const int mb_xy= h->mb_xy;
+ uint8_t *nnz = h->non_zero_count[mb_xy];
+ uint8_t *nnz_cache = h->non_zero_count_cache;
+
+ AV_COPY32(&nnz[ 0], &nnz_cache[4+8* 1]);
+ AV_COPY32(&nnz[ 4], &nnz_cache[4+8* 2]);
+ AV_COPY32(&nnz[ 8], &nnz_cache[4+8* 3]);
+ AV_COPY32(&nnz[12], &nnz_cache[4+8* 4]);
+ AV_COPY32(&nnz[16], &nnz_cache[4+8* 6]);
+ AV_COPY32(&nnz[20], &nnz_cache[4+8* 7]);
+ AV_COPY32(&nnz[32], &nnz_cache[4+8*11]);
+ AV_COPY32(&nnz[36], &nnz_cache[4+8*12]);
if(CHROMA444){
- AV_COPY32(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[4+8* 8]);
- AV_COPY32(&h->non_zero_count[mb_xy][28], &h->non_zero_count_cache[4+8* 9]);
- AV_COPY32(&h->non_zero_count[mb_xy][40], &h->non_zero_count_cache[4+8*13]);
- AV_COPY32(&h->non_zero_count[mb_xy][44], &h->non_zero_count_cache[4+8*14]);
+ AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]);
+ AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]);
+ AV_COPY32(&nnz[40], &nnz_cache[4+8*13]);
+ AV_COPY32(&nnz[44], &nnz_cache[4+8*14]);
}
}
-static inline void write_back_motion(H264Context *h, int mb_type){
+static av_always_inline void write_back_motion_list(H264Context *h, MpegEncContext * const s, int b_stride,
+ int b_xy, int b8_xy, int mb_type, int list )
+{
+ int16_t (*mv_dst)[2] = &s->current_picture.motion_val[list][b_xy];
+ int16_t (*mv_src)[2] = &h->mv_cache[list][scan8[0]];
+ AV_COPY128(mv_dst + 0*b_stride, mv_src + 8*0);
+ AV_COPY128(mv_dst + 1*b_stride, mv_src + 8*1);
+ AV_COPY128(mv_dst + 2*b_stride, mv_src + 8*2);
+ AV_COPY128(mv_dst + 3*b_stride, mv_src + 8*3);
+ if( CABAC ) {
+ uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]];
+ uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
+ if(IS_SKIP(mb_type))
+ AV_ZERO128(mvd_dst);
+ else{
+ AV_COPY64(mvd_dst, mvd_src + 8*3);
+ AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0);
+ AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1);
+ AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2);
+ }
+ }
+
+ {
+ int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
+ int8_t *ref_cache = h->ref_cache[list];
+ ref_index[0+0*2]= ref_cache[scan8[0]];
+ ref_index[1+0*2]= ref_cache[scan8[4]];
+ ref_index[0+1*2]= ref_cache[scan8[8]];
+ ref_index[1+1*2]= ref_cache[scan8[12]];
+ }
+}
+
+static av_always_inline void write_back_motion(H264Context *h, int mb_type){
MpegEncContext * const s = &h->s;
+ const int b_stride = h->b_stride;
const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy
const int b8_xy= 4*h->mb_xy;
- int list;
- if(!USES_LIST(mb_type, 0))
+ if(USES_LIST(mb_type, 0)){
+ write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 0);
+ }else{
fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, 2, (uint8_t)LIST_NOT_USED, 1);
-
- for(list=0; list<h->list_count; list++){
- int y, b_stride;
- int16_t (*mv_dst)[2];
- int16_t (*mv_src)[2];
-
- if(!USES_LIST(mb_type, list))
- continue;
-
- b_stride = h->b_stride;
- mv_dst = &s->current_picture.motion_val[list][b_xy];
- mv_src = &h->mv_cache[list][scan8[0]];
- for(y=0; y<4; y++){
- AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y);
- }
- if( CABAC ) {
- uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]];
- uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
- if(IS_SKIP(mb_type))
- AV_ZERO128(mvd_dst);
- else{
- AV_COPY64(mvd_dst, mvd_src + 8*3);
- AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0);
- AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1);
- AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2);
- }
- }
-
- {
- int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
- ref_index[0+0*2]= h->ref_cache[list][scan8[0]];
- ref_index[1+0*2]= h->ref_cache[list][scan8[4]];
- ref_index[0+1*2]= h->ref_cache[list][scan8[8]];
- ref_index[1+1*2]= h->ref_cache[list][scan8[12]];
- }
+ }
+ if(USES_LIST(mb_type, 1)){
+ write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 1);
}
if(h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC){
@@ -1272,7 +1294,7 @@ static inline void write_back_motion(H264Context *h, int mb_type){
}
}
-static inline int get_dct8x8_allowed(H264Context *h){
+static av_always_inline int get_dct8x8_allowed(H264Context *h){
if(h->sps.direct_8x8_inference_flag)
return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
else
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 3975a61699..325e15bc0a 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1296,9 +1296,9 @@ static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_sl
if(intra_slice){
int ctx=0;
- if( h->left_type[0] & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
+ if( h->left_type[LTOP] & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
ctx++;
- if( h->top_type & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
+ if( h->top_type & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
ctx++;
if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
return 0; /* I4x4 */
@@ -1376,10 +1376,10 @@ static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
int ctx = 0;
/* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
- if( h->left_type[0] && h->chroma_pred_mode_table[mba_xy] != 0 )
+ if( h->left_type[LTOP] && h->chroma_pred_mode_table[mba_xy] != 0 )
ctx++;
- if( h->top_type && h->chroma_pred_mode_table[mbb_xy] != 0 )
+ if( h->top_type && h->chroma_pred_mode_table[mbb_xy] != 0 )
ctx++;
if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
@@ -1881,7 +1881,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
int ctx = 0;
assert(h->slice_type_nos == AV_PICTURE_TYPE_B);
- if( !IS_DIRECT( h->left_type[0]-1 ) )
+ if( !IS_DIRECT( h->left_type[LTOP]-1 ) )
ctx++;
if( !IS_DIRECT( h->top_type-1 ) )
ctx++;
@@ -2000,7 +2000,7 @@ decode_intra_mb:
//av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
}
}
- ff_h264_write_back_intra_pred_mode(h);
+ write_back_intra_pred_mode(h);
if( ff_h264_check_intra4x4_pred_mode(h) < 0 ) return -1;
} else {
h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode( h, h->intra16x16_pred_mode );
@@ -2249,21 +2249,22 @@ decode_intra_mb:
* the transform mode of the current macroblock there. */
if (CHROMA444 && IS_8x8DCT(mb_type)){
int i;
+ uint8_t *nnz_cache = h->non_zero_count_cache;
for (i = 0; i < 2; i++){
- if (h->left_type[i] && !IS_8x8DCT(h->left_type[i])){
- h->non_zero_count_cache[3+8* 1 + 2*8*i]=
- h->non_zero_count_cache[3+8* 2 + 2*8*i]=
- h->non_zero_count_cache[3+8* 6 + 2*8*i]=
- h->non_zero_count_cache[3+8* 7 + 2*8*i]=
- h->non_zero_count_cache[3+8*11 + 2*8*i]=
- h->non_zero_count_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
+ if (h->left_type[LEFT(i)] && !IS_8x8DCT(h->left_type[LEFT(i)])){
+ nnz_cache[3+8* 1 + 2*8*i]=
+ nnz_cache[3+8* 2 + 2*8*i]=
+ nnz_cache[3+8* 6 + 2*8*i]=
+ nnz_cache[3+8* 7 + 2*8*i]=
+ nnz_cache[3+8*11 + 2*8*i]=
+ nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
}
}
if (h->top_type && !IS_8x8DCT(h->top_type)){
uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
- AV_WN32A(&h->non_zero_count_cache[4+8* 0], top_empty);
- AV_WN32A(&h->non_zero_count_cache[4+8* 5], top_empty);
- AV_WN32A(&h->non_zero_count_cache[4+8*10], top_empty);
+ AV_WN32A(&nnz_cache[4+8* 0], top_empty);
+ AV_WN32A(&nnz_cache[4+8* 5], top_empty);
+ AV_WN32A(&nnz_cache[4+8*10], top_empty);
}
}
s->current_picture.mb_type[mb_xy]= mb_type;
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 0ddc430661..27ab9868f6 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -731,7 +731,7 @@ decode_intra_mb:
else
h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
}
- ff_h264_write_back_intra_pred_mode(h);
+ write_back_intra_pred_mode(h);
if( ff_h264_check_intra4x4_pred_mode(h) < 0)
return -1;
}else{
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index 226c2aef28..c51ada287a 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -215,19 +215,20 @@ static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, int16_t
void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
MpegEncContext * const s = &h->s;
int mb_xy;
- int mb_type, left_type;
+ int mb_type, left_type, top_type;
int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
int chroma444 = CHROMA444;
mb_xy = h->mb_xy;
- if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
+ if(!h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
return;
}
assert(!FRAME_MBAFF);
- left_type= h->left_type[0];
+ left_type= h->left_type[LTOP];
+ top_type= h->top_type;
mb_type = s->current_picture.mb_type[mb_xy];
qp = s->current_picture.qscale_table[mb_xy];
@@ -253,13 +254,17 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h);
if( IS_8x8DCT(mb_type) ) {
filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
- filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
+ if(top_type){
+ filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
+ }
filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
} else {
filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h);
filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h);
- filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
+ if(top_type){
+ filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
+ }
filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h);
filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h);
@@ -273,8 +278,10 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
if( IS_8x8DCT(mb_type) ) {
filter_mb_edgev( &img_cb[4*2], linesize, bS3, qpc, h);
filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h);
- filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
- filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
+ if(top_type){
+ filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
+ filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
+ }
filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h);
filter_mb_edgeh( &img_cr[4*2*linesize], linesize, bS3, qpc, h);
} else {
@@ -284,8 +291,10 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h);
filter_mb_edgev( &img_cb[4*3], linesize, bS3, qpc, h);
filter_mb_edgev( &img_cr[4*3], linesize, bS3, qpc, h);
- filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
- filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
+ if(top_type){
+ filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
+ filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
+ }
filter_mb_edgeh( &img_cb[4*1*linesize], linesize, bS3, qpc, h);
filter_mb_edgeh( &img_cr[4*1*linesize], linesize, bS3, qpc, h);
filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h);
@@ -300,9 +309,11 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
}
filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
- filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
+ if(top_type){
+ filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
+ filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
+ }
filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
- filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
}
}
@@ -318,7 +329,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
AV_WN64A(bS[1][2], 0x0002000200020002ULL);
} else {
int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0;
- int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
+ int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[LTOP] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
int step = 1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1;
edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
@@ -326,7 +337,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
}
if( IS_INTRA(left_type) )
AV_WN64A(bS[0][0], 0x0004000400040004ULL);
- if( IS_INTRA(h->top_type) )
+ if( IS_INTRA(top_type) )
AV_WN64A(bS[1][0], FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL);
#define FILTER(hv,dir,edge)\
@@ -345,16 +356,19 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
if(left_type)
FILTER(v,0,0);
if( edges == 1 ) {
- FILTER(h,1,0);
+ if(top_type)
+ FILTER(h,1,0);
} else if( IS_8x8DCT(mb_type) ) {
FILTER(v,0,2);
- FILTER(h,1,0);
+ if(top_type)
+ FILTER(h,1,0);
FILTER(h,1,2);
} else {
FILTER(v,0,1);
FILTER(v,0,2);
FILTER(v,0,3);
- FILTER(h,1,0);
+ if(top_type)
+ FILTER(h,1,0);
FILTER(h,1,1);
FILTER(h,1,2);
FILTER(h,1,3);
@@ -397,7 +411,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
int edge;
int chroma_qp_avg[2];
const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
- const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type;
+ const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type;
// how often to recheck mv-based bS when iterating between edges
static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1},
@@ -633,9 +647,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
if (FRAME_MBAFF
// and current and left pair do not have the same interlaced type
- && IS_INTERLACED(mb_type^h->left_type[0])
+ && IS_INTERLACED(mb_type^h->left_type[LTOP])
// and left mb is in available to us
- && h->left_type[0]) {
+ && h->left_type[LTOP]) {
/* First vertical edge is different in MBAFF frames
* There are 8 different bS to compute and 2 different Qp
*/
@@ -663,8 +677,8 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
const uint8_t *off= offset[MB_FIELD][mb_y&1];
for( i = 0; i < 8; i++ ) {
int j= MB_FIELD ? i>>2 : i&1;
- int mbn_xy = h->left_mb_xy[j];
- int mbn_type= h->left_type[j];
+ int mbn_xy = h->left_mb_xy[LEFT(j)];
+ int mbn_type= h->left_type[LEFT(j)];
if( IS_INTRA( mbn_type ) )
bS[i] = 4;
diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h
index 661ef6c381..a0886d5d47 100644
--- a/libavcodec/h264_mvpred.h
+++ b/libavcodec/h264_mvpred.h
@@ -64,7 +64,6 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
if(!MB_FIELD
&& IS_INTERLACED(h->left_type[0])){
SET_DIAG_MV(*2, >>1, h->left_mb_xy[0]+s->mb_stride, (s->mb_y&1)*2+(i>>5));
- assert(h->left_mb_xy[0] == h->left_mb_xy[1]);
}
if(MB_FIELD
&& !IS_INTERLACED(h->left_type[0])){
diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
index 83f6f38691..2acdf47a2f 100644
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@@ -237,7 +237,6 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *
}
/**
* IDCT transforms the 16 dc values and dequantizes them.
- * @param qp quantization parameter
*/
void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *p_output, DCTELEM *p_input, int qmul){
#define stride 16
diff --git a/libavcodec/ituh263dec.c b/libavcodec/ituh263dec.c
index b1e67231fd..c0d90eab28 100644
--- a/libavcodec/ituh263dec.c
+++ b/libavcodec/ituh263dec.c
@@ -30,6 +30,7 @@
//#define DEBUG
#include <limits.h>
+#include "libavutil/mathematics.h"
#include "dsputil.h"
#include "avcodec.h"
#include "mpegvideo.h"
diff --git a/libavcodec/lpc.c b/libavcodec/lpc.c
index d041cafe85..9488a0f060 100644
--- a/libavcodec/lpc.c
+++ b/libavcodec/lpc.c
@@ -149,7 +149,7 @@ static int estimate_best_order(double *ref, int min_order, int max_order)
/**
* Calculate LPC coefficients for multiple orders
*
- * @param use_lpc LPC method for determining coefficients
+ * @param lpc_type LPC method for determining coefficients
* 0 = LPC with fixed pre-defined coeffs
* 1 = LPC with coeffs determined by Levinson-Durbin recursion
* 2+ = LPC with coeffs determined by Cholesky factorization using (use_lpc-1) passes.
diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index 02c66f504b..b875f5b8d4 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -1505,29 +1505,26 @@ eoi_parser:
av_log(avctx, AV_LOG_WARNING, "Found EOI before any SOF, ignoring\n");
break;
}
- {
- if (s->interlaced) {
- s->bottom_field ^= 1;
- /* if not bottom field, do not output image yet */
- if (s->bottom_field == !s->interlace_polarity)
- goto not_the_end;
- }
- *picture = *s->picture_ptr;
- *data_size = sizeof(AVFrame);
-
- if(!s->lossless){
- picture->quality= FFMAX3(s->qscale[0], s->qscale[1], s->qscale[2]);
- picture->qstride= 0;
- picture->qscale_table= s->qscale_table;
- memset(picture->qscale_table, picture->quality, (s->width+15)/16);
- if(avctx->debug & FF_DEBUG_QP)
- av_log(avctx, AV_LOG_DEBUG, "QP: %d\n", picture->quality);
- picture->quality*= FF_QP2LAMBDA;
- }
-
- goto the_end;
+ if (s->interlaced) {
+ s->bottom_field ^= 1;
+ /* if not bottom field, do not output image yet */
+ if (s->bottom_field == !s->interlace_polarity)
+ goto not_the_end;
}
- break;
+ *picture = *s->picture_ptr;
+ *data_size = sizeof(AVFrame);
+
+ if(!s->lossless){
+ picture->quality= FFMAX3(s->qscale[0], s->qscale[1], s->qscale[2]);
+ picture->qstride= 0;
+ picture->qscale_table= s->qscale_table;
+ memset(picture->qscale_table, picture->quality, (s->width+15)/16);
+ if(avctx->debug & FF_DEBUG_QP)
+ av_log(avctx, AV_LOG_DEBUG, "QP: %d\n", picture->quality);
+ picture->quality*= FF_QP2LAMBDA;
+ }
+
+ goto the_end;
case SOS:
if (!s->got_picture) {
av_log(avctx, AV_LOG_WARNING, "Can not process SOS before SOF, skipping\n");
diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c
index 5d319c5da2..e9556e9b0f 100644
--- a/libavcodec/motion_est_template.c
+++ b/libavcodec/motion_est_template.c
@@ -993,7 +993,7 @@ static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dm
}
/**
- @param P[10][2] a list of candidate mvs to check before starting the
+ @param P a list of candidate mvs to check before starting the
iterative search. If one of the candidates is close to the optimal mv, then
it takes fewer iterations. And it increases the chance that we find the
optimal mv.
diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index bd858a59b7..e7bae574e8 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -1965,8 +1965,6 @@ static int slice_decode_thread(AVCodecContext *c, void *arg){
if(mb_y < 0 || mb_y >= s->end_mb_y)
return -1;
}
-
- return 0; //not reached
}
/**
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 888d8130e9..d32b9e63fb 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -28,6 +28,7 @@
*/
#include "libavutil/intmath.h"
+#include "libavutil/mathematics.h"
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c
index 111ce6193f..852e34981f 100644
--- a/libavcodec/pcm.c
+++ b/libavcodec/pcm.c
@@ -440,7 +440,6 @@ static int pcm_decode_frame(AVCodecContext *avctx,
default:
av_log(avctx, AV_LOG_ERROR, "PCM DVD unsupported sample depth\n");
return -1;
- break;
}
samples = (short *) dst_int32_t;
break;
diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index f593d0a164..37ba8261c4 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c
@@ -471,7 +471,6 @@ static int shorten_decode_frame(AVCodecContext *avctx,
s->cur_chan = 0;
goto frame_done;
}
- break;
}
break;
case FN_VERBATIM:
@@ -489,11 +488,9 @@ static int shorten_decode_frame(AVCodecContext *avctx,
case FN_QUIT:
*data_size = 0;
return buf_size;
- break;
default:
av_log(avctx, AV_LOG_ERROR, "unknown shorten function %d\n", cmd);
return -1;
- break;
}
}
frame_done:
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index 43a71904eb..7b763af67f 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -33,42 +33,6 @@
#undef NDEBUG
#include <assert.h>
-static const int8_t quant3[256]={
- 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
-};
-static const int8_t quant3b[256]={
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-};
static const int8_t quant3bA[256]={
0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
@@ -87,153 +51,7 @@ static const int8_t quant3bA[256]={
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
};
-static const int8_t quant5[256]={
- 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
-};
-static const int8_t quant7[256]={
- 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
-};
-static const int8_t quant9[256]={
- 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
-};
-static const int8_t quant11[256]={
- 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
-};
-static const int8_t quant13[256]={
- 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
-};
-#if 0 //64*cubic
-static const uint8_t obmc32[1024]={
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
- 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
- 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
- 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
- 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
- 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
- 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
- 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
- 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
- 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
- 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
- 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
- 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
- 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
- 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
- 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
- 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
- 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
- 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
- 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
- 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
- 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
- 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
- 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
- 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
- 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
- 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
- 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
- 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
- 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-//error:0.000022
-};
-static const uint8_t obmc16[256]={
- 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
- 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
- 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
- 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
- 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
- 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
- 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
- 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
- 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
- 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
- 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
- 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
- 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
- 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
- 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
- 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
-//error:0.000033
-};
-#elif 1 // 64*linear
static const uint8_t obmc32[1024]={
0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
@@ -288,62 +106,6 @@ static const uint8_t obmc16[256]={
0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
//error:0.000015
};
-#else //64*cos
-static const uint8_t obmc32[1024]={
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
- 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
- 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
- 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
- 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
- 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
- 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
- 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
- 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
- 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
- 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
- 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
- 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
- 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
- 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
- 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
- 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
- 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
- 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
- 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
- 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
- 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
- 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
- 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
- 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
- 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
- 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
- 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-//error:0.000022
-};
-static const uint8_t obmc16[256]={
- 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
- 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
- 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
- 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
- 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
- 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
- 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
- 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
- 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
- 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
- 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
- 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
- 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
- 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
- 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
- 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
-//error:0.000022
-};
-#endif /* 0 */
//linear *64
static const uint8_t obmc8[64]={
@@ -509,7 +271,6 @@ static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signe
if(v){
const int a= FFABS(v);
const int e= av_log2(a);
-#if 1
const int el= FFMIN(e, 10);
put_rac(c, state+0, 0);
@@ -530,35 +291,6 @@ static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signe
if(is_signed)
put_rac(c, state+11 + el, v < 0); //11..21
-#else
-
- put_rac(c, state+0, 0);
- if(e<=9){
- for(i=0; i<e; i++){
- put_rac(c, state+1+i, 1); //1..10
- }
- put_rac(c, state+1+i, 0);
-
- for(i=e-1; i>=0; i--){
- put_rac(c, state+22+i, (a>>i)&1); //22..31
- }
-
- if(is_signed)
- put_rac(c, state+11 + e, v < 0); //11..21
- }else{
- for(i=0; i<e; i++){
- put_rac(c, state+1+FFMIN(i,9), 1); //1..10
- }
- put_rac(c, state+1+9, 0);
-
- for(i=e-1; i>=0; i--){
- put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
- }
-
- if(is_signed)
- put_rac(c, state+11 + 10, v < 0); //11..21
- }
-#endif /* 1 */
}else{
put_rac(c, state+0, 1);
}
@@ -789,14 +521,6 @@ static int alloc_blocks(SnowContext *s){
return 0;
}
-static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
- uint8_t *bytestream= d->bytestream;
- uint8_t *bytestream_start= d->bytestream_start;
- *d= *s;
- d->bytestream= bytestream;
- d->bytestream_start= bytestream_start;
-}
-
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
const int w= s->b_width << s->block_max_depth;
const int rem_depth= s->block_max_depth - level;
@@ -1323,40 +1047,6 @@ static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer
block[3]= ptmp;
pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
}
-#if 0
- for(y=0; y<b_h; y++){
- for(x=0; x<b_w; x++){
- int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
- if(add) dst[x + y*dst_stride] += v;
- else dst[x + y*dst_stride] -= v;
- }
- }
- for(y=0; y<b_h; y++){
- uint8_t *obmc2= obmc + (obmc_stride>>1);
- for(x=0; x<b_w; x++){
- int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
- if(add) dst[x + y*dst_stride] += v;
- else dst[x + y*dst_stride] -= v;
- }
- }
- for(y=0; y<b_h; y++){
- uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
- for(x=0; x<b_w; x++){
- int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
- if(add) dst[x + y*dst_stride] += v;
- else dst[x + y*dst_stride] -= v;
- }
- }
- for(y=0; y<b_h; y++){
- uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
- uint8_t *obmc4= obmc3+ (obmc_stride>>1);
- for(x=0; x<b_w; x++){
- int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
- if(add) dst[x + y*dst_stride] += v;
- else dst[x + y*dst_stride] -= v;
- }
- }
-#else
if(sliced){
s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
}else{
@@ -1387,7 +1077,6 @@ static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer
}
}
}
-#endif /* 0 */
}
static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
@@ -4008,6 +3697,7 @@ AVCodec ff_snow_encoder = {
#undef printf
#include "libavutil/lfg.h"
+#include "libavutil/mathematics.h"
int main(void){
int width=256;
@@ -4042,27 +3732,6 @@ int main(void){
for(i=0; i<width*height; i++)
if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
-#if 0
- printf("testing AC coder\n");
- memset(s.header_state, 0, sizeof(s.header_state));
- ff_init_range_encoder(&s.c, buffer[0], 256*256);
- ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
-
- for(i=-256; i<256; i++){
- put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
- }
- ff_rac_terminate(&s.c);
-
- memset(s.header_state, 0, sizeof(s.header_state));
- ff_init_range_decoder(&s.c, buffer[0], 256*256);
- ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
-
- for(i=-256; i<256; i++){
- int j;
- j= get_symbol(&s.c, s.header_state, 1);
- if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
- }
-#endif
{
int level, orientation, x, y;
int64_t errors[8][4];
@@ -4120,7 +3789,6 @@ int main(void){
buf+=stride>>1;
memset(buffer[0], 0, sizeof(int)*width*height);
-#if 1
for(y=0; y<height; y++){
for(x=0; x<width; x++){
int tab[4]={0,2,3,1};
@@ -4128,15 +3796,6 @@ int main(void){
}
}
ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
-#else
- for(y=0; y<h; y++){
- for(x=0; x<w; x++){
- buf[x + y*stride ]=169;
- buf[x + y*stride-w]=64;
- }
- }
- ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
-#endif
for(y=0; y<height; y++){
for(x=0; x<width; x++){
int64_t d= buffer[0][x + y*width];
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index 30cd0f102f..ff0c668b67 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -591,7 +591,7 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
}
}
- ff_h264_write_back_intra_pred_mode(h);
+ write_back_intra_pred_mode(h);
if (mb_type == 8) {
ff_h264_check_intra4x4_pred_mode(h);
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index bc08e350cf..0c84965883 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -27,6 +27,7 @@
#include "libavutil/avstring.h"
#include "libavutil/crc.h"
+#include "libavutil/mathematics.h"
#include "libavutil/pixdesc.h"
#include "libavutil/audioconvert.h"
#include "libavutil/imgutils.h"
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 082d8e5829..c7d5a56af4 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -1039,7 +1039,7 @@ static const uint8_t subpel_idx[3][8] = {
* @param s VP8 decoding context
* @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
* @param dst target buffer for block data at block position
- * @param src reference picture buffer at origin (0, 0)
+ * @param ref reference picture buffer at origin (0, 0)
* @param mv motion vector (relative to block position) to get pixel data from
* @param x_off horizontal position of block from origin (0, 0)
* @param y_off vertical position of block from origin (0, 0)
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 7579cb16ae..09f65e8691 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -47,6 +47,7 @@ MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \
x86/fmtconvert.o \
x86/h264_chromamc.o \
x86/h264_chromamc_10bit.o \
+ x86/h264_qpel_10bit.o \
$(YASM-OBJS-yes)
MMX-OBJS-$(CONFIG_FFT) += x86/fft.o
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 80bb6cd26c..13530418ff 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2530,44 +2530,56 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
}
-#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \
- c->PFX ## _pixels_tab[IDX][ 0] = PFX ## SIZE ## _mc00_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 1] = PFX ## SIZE ## _mc10_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 2] = PFX ## SIZE ## _mc20_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 3] = PFX ## SIZE ## _mc30_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 4] = PFX ## SIZE ## _mc01_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 5] = PFX ## SIZE ## _mc11_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 6] = PFX ## SIZE ## _mc21_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 7] = PFX ## SIZE ## _mc31_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 8] = PFX ## SIZE ## _mc02_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 9] = PFX ## SIZE ## _mc12_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][10] = PFX ## SIZE ## _mc22_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][11] = PFX ## SIZE ## _mc32_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][12] = PFX ## SIZE ## _mc03_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][13] = PFX ## SIZE ## _mc13_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][14] = PFX ## SIZE ## _mc23_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][15] = PFX ## SIZE ## _mc33_ ## CPU
-
- SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
+#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
+ c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU
+
+ SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2, );
if (!high_bit_depth) {
- SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
- SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, );
}
+#if HAVE_YASM
+ else if (bit_depth == 10) {
+#if !ARCH_X86_64
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_);
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_);
+#endif
+ SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_);
+ SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_);
+ }
+#endif
- SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2);
+ SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, );
#if HAVE_YASM
c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
@@ -2627,26 +2639,26 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
}
- SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
+ SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow, );
if (!high_bit_depth) {
- SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
- SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow, );
}
- SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
+ SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, );
#if HAVE_YASM
if (!high_bit_depth) {
@@ -2690,7 +2702,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 3, sse2);
}
#if HAVE_YASM
+#define H264_QPEL_FUNCS_10(x, y, CPU)\
+ c->put_h264_qpel_pixels_tab[0][x+y*4] = ff_put_h264_qpel16_mc##x##y##_10_##CPU;\
+ c->put_h264_qpel_pixels_tab[1][x+y*4] = ff_put_h264_qpel8_mc##x##y##_10_##CPU;\
+ c->avg_h264_qpel_pixels_tab[0][x+y*4] = ff_avg_h264_qpel16_mc##x##y##_10_##CPU;\
+ c->avg_h264_qpel_pixels_tab[1][x+y*4] = ff_avg_h264_qpel8_mc##x##y##_10_##CPU;
if (bit_depth == 10) {
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_);
+ H264_QPEL_FUNCS_10(1, 0, sse2_cache64)
+ H264_QPEL_FUNCS_10(2, 0, sse2_cache64)
+ H264_QPEL_FUNCS_10(3, 0, sse2_cache64)
+
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_sse2;
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_sse2;
}
@@ -2712,6 +2737,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 2, ssse3);
H264_QPEL_FUNCS(3, 3, ssse3);
}
+ else if (bit_depth == 10) {
+ H264_QPEL_FUNCS_10(1, 0, ssse3_cache64)
+ H264_QPEL_FUNCS_10(2, 0, ssse3_cache64)
+ H264_QPEL_FUNCS_10(3, 0, ssse3_cache64)
+ }
#if HAVE_YASM
if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
@@ -2807,6 +2837,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#if HAVE_AVX && HAVE_YASM
if (mm_flags & AV_CPU_FLAG_AVX) {
if (bit_depth == 10) {
+ //AVX implies !cache64.
+ //TODO: Port cache(32|64) detection from x264.
+ H264_QPEL_FUNCS_10(1, 0, sse2)
+ H264_QPEL_FUNCS_10(2, 0, sse2)
+ H264_QPEL_FUNCS_10(3, 0, sse2)
+
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_avx;
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_avx;
}
diff --git a/libavcodec/x86/h264_qpel_10bit.asm b/libavcodec/x86/h264_qpel_10bit.asm
new file mode 100644
index 0000000000..15dd72ca36
--- /dev/null
+++ b/libavcodec/x86/h264_qpel_10bit.asm
@@ -0,0 +1,891 @@
+;*****************************************************************************
+;* MMX/SSE2/AVX-optimized 10-bit H.264 qpel code
+;*****************************************************************************
+;* Copyright (C) 2011 x264 project
+;*
+;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION_RODATA 32
+
+cextern pw_16
+cextern pw_1
+cextern pb_0
+
+pw_pixel_max: times 8 dw ((1 << 10)-1)
+
+pad10: times 8 dw 10*1023
+pad20: times 8 dw 20*1023
+pad30: times 8 dw 30*1023
+depad: times 4 dd 32*20*1023 + 512
+depad2: times 8 dw 20*1023 + 16*1022 + 16
+unpad: times 8 dw 16*1022/32 ; needs to be mod 16
+
+tap1: times 4 dw 1, -5
+tap2: times 4 dw 20, 20
+tap3: times 4 dw -5, 1
+pd_0f: times 4 dd 0xffff
+
+SECTION .text
+
+
+%macro AVG_MOV 2
+ pavgw %2, %1
+ mova %1, %2
+%endmacro
+
+%macro ADDW 3
+%if mmsize == 8
+ paddw %1, %2
+%else
+ movu %3, %2
+ paddw %1, %3
+%endif
+%endmacro
+
+%macro FILT_H 4
+ paddw %1, %4
+ psubw %1, %2 ; a-b
+ psraw %1, 2 ; (a-b)/4
+ psubw %1, %2 ; (a-b)/4-b
+ paddw %1, %3 ; (a-b)/4-b+c
+ psraw %1, 2 ; ((a-b)/4-b+c)/4
+ paddw %1, %3 ; ((a-b)/4-b+c)/4+c = (a-5*b+20*c)/16
+%endmacro
+
+%macro PRELOAD_V 0
+ lea r3, [r2*3]
+ sub r1, r3
+ movu m0, [r1+r2]
+ movu m1, [r1+r2*2]
+ add r1, r3
+ movu m2, [r1]
+ movu m3, [r1+r2]
+ movu m4, [r1+r2*2]
+ add r1, r3
+%endmacro
+
+%macro FILT_V 8
+ movu %6, [r1]
+ paddw %1, %6
+ mova %7, %2
+ paddw %7, %5
+ mova %8, %3
+ paddw %8, %4
+ FILT_H %1, %7, %8, [pw_16]
+ psraw %1, 1
+ CLIPW %1, [pb_0], [pw_pixel_max]
+%endmacro
+
+%macro MC 1
+%define OP_MOV mova
+INIT_MMX
+%1 mmxext, put, 4
+INIT_XMM
+%1 sse2 , put, 8
+
+%define OP_MOV AVG_MOV
+INIT_MMX
+%1 mmxext, avg, 4
+INIT_XMM
+%1 sse2 , avg, 8
+%endmacro
+
+%macro MCAxA 8
+%ifdef ARCH_X86_64
+%ifnidn %1,mmxext
+MCAxA_OP %1,%2,%3,%4,%5,%6,%7,%8
+%endif
+%else
+MCAxA_OP %1,%2,%3,%4,%5,%6,%7,%8
+%endif
+%endmacro
+
+%macro MCAxA_OP 8
+cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8
+%ifdef ARCH_X86_32
+ call stub_%2_h264_qpel%4_%3_10_%1
+ mov r0, r0m
+ mov r1, r1m
+ add r0, %4*2
+ add r1, %4*2
+ call stub_%2_h264_qpel%4_%3_10_%1
+ mov r0, r0m
+ mov r1, r1m
+ lea r0, [r0+r2*%4]
+ lea r1, [r1+r2*%4]
+ call stub_%2_h264_qpel%4_%3_10_%1
+ mov r0, r0m
+ mov r1, r1m
+ lea r0, [r0+r2*%4+%4*2]
+ lea r1, [r1+r2*%4+%4*2]
+ call stub_%2_h264_qpel%4_%3_10_%1
+ RET
+%else ; ARCH_X86_64
+ mov r10, r0
+ mov r11, r1
+ call stub_%2_h264_qpel%4_%3_10_%1
+ lea r0, [r10+%4*2]
+ lea r1, [r11+%4*2]
+ call stub_%2_h264_qpel%4_%3_10_%1
+ lea r0, [r10+r2*%4]
+ lea r1, [r11+r2*%4]
+ call stub_%2_h264_qpel%4_%3_10_%1
+ lea r0, [r10+r2*%4+%4*2]
+ lea r1, [r11+r2*%4+%4*2]
+%ifndef UNIX64 ; fall through to function
+ call stub_%2_h264_qpel%4_%3_10_%1
+ RET
+%endif
+%endif
+%endmacro
+
+;cpu, put/avg, mc, 4/8, ...
+%macro cglobal_mc 7
+%assign i %4*2
+MCAxA %1, %2, %3, %4, i, %5,%6,%7
+
+cglobal %2_h264_qpel%4_%3_10_%1, %5,%6,%7
+%ifndef UNIX64 ; no prologue or epilogue for UNIX64
+ call stub_%2_h264_qpel%4_%3_10_%1
+ RET
+%endif
+
+stub_%2_h264_qpel%4_%3_10_%1:
+%endmacro
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc00(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro COPY4 0
+ movu m0, [r1 ]
+ OP_MOV [r0 ], m0
+ movu m0, [r1+r2 ]
+ OP_MOV [r0+r2 ], m0
+ movu m0, [r1+r2*2]
+ OP_MOV [r0+r2*2], m0
+ movu m0, [r1+r3 ]
+ OP_MOV [r0+r3 ], m0
+%endmacro
+
+%macro MC00 1
+INIT_MMX
+cglobal_mc mmxext, %1, mc00, 4, 3,4,0
+ lea r3, [r2*3]
+ COPY4
+ ret
+
+INIT_XMM
+cglobal %1_h264_qpel8_mc00_10_sse2, 3,4
+ lea r3, [r2*3]
+ COPY4
+ lea r0, [r0+r2*4]
+ lea r1, [r1+r2*4]
+ COPY4
+ RET
+
+cglobal %1_h264_qpel16_mc00_10_sse2, 3,4
+ mov r3d, 8
+.loop:
+ movu m0, [r1 ]
+ movu m1, [r1 +16]
+ OP_MOV [r0 ], m0
+ OP_MOV [r0 +16], m1
+ movu m0, [r1+r2 ]
+ movu m1, [r1+r2+16]
+ OP_MOV [r0+r2 ], m0
+ OP_MOV [r0+r2+16], m1
+ lea r0, [r0+r2*2]
+ lea r1, [r1+r2*2]
+ dec r3d
+ jg .loop
+ REP_RET
+%endmacro
+
+%define OP_MOV mova
+MC00 put
+
+%define OP_MOV AVG_MOV
+MC00 avg
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc20(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC_CACHE 1
+%define OP_MOV mova
+%define PALIGNR PALIGNR_MMX
+INIT_MMX
+%1 mmxext , put, 4
+INIT_XMM
+%1 sse2_cache64 , put, 8
+%define PALIGNR PALIGNR_SSSE3
+%1 ssse3_cache64, put, 8
+%1 sse2 , put, 8, 0
+
+%define OP_MOV AVG_MOV
+%define PALIGNR PALIGNR_MMX
+INIT_MMX
+%1 mmxext , avg, 4
+INIT_XMM
+%1 sse2_cache64 , avg, 8
+%define PALIGNR PALIGNR_SSSE3
+%1 ssse3_cache64, avg, 8
+%1 sse2 , avg, 8, 0
+%endmacro
+
+%macro MC20 3-4
+cglobal_mc %1, %2, mc20, %3, 3,4,9
+ mov r3d, %3
+ mova m1, [pw_pixel_max]
+%if num_mmregs > 8
+ mova m8, [pw_16]
+ %define p16 m8
+%else
+ %define p16 [pw_16]
+%endif
+.nextrow
+%if %0 == 4
+ movu m2, [r1-4]
+ movu m3, [r1-2]
+ movu m4, [r1+0]
+ ADDW m2, [r1+6], m5
+ ADDW m3, [r1+4], m5
+ ADDW m4, [r1+2], m5
+%else ; movu is slow on these processors
+%if mmsize==16
+ movu m2, [r1-4]
+ movu m0, [r1+6]
+ mova m6, m0
+ psrldq m0, 6
+
+ paddw m6, m2
+ PALIGNR m3, m0, m2, 2, m5
+ PALIGNR m7, m0, m2, 8, m5
+ paddw m3, m7
+ PALIGNR m4, m0, m2, 4, m5
+ PALIGNR m7, m0, m2, 6, m5
+ paddw m4, m7
+ SWAP 2, 6
+%else
+ movu m2, [r1-4]
+ movu m6, [r1+4]
+ PALIGNR m3, m6, m2, 2, m5
+ paddw m3, m6
+ PALIGNR m4, m6, m2, 4, m5
+ PALIGNR m7, m6, m2, 6, m5
+ paddw m4, m7
+ paddw m2, [r1+6]
+%endif
+%endif
+
+ FILT_H m2, m3, m4, p16
+ psraw m2, 1
+ pxor m0, m0
+ CLIPW m2, m0, m1
+ OP_MOV [r0], m2
+ add r0, r2
+ add r1, r2
+ dec r3d
+ jg .nextrow
+ rep ret
+%endmacro
+
+MC_CACHE MC20
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc30(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC30 3-4
+cglobal_mc %1, %2, mc30, %3, 3,5,9
+ lea r4, [r1+2]
+ jmp stub_%2_h264_qpel%3_mc10_10_%1.body
+%endmacro
+
+MC_CACHE MC30
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc10(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC10 3-4
+cglobal_mc %1, %2, mc10, %3, 3,5,9
+ mov r4, r1
+.body
+ mov r3d, %3
+ mova m1, [pw_pixel_max]
+%if num_mmregs > 8
+ mova m8, [pw_16]
+ %define p16 m8
+%else
+ %define p16 [pw_16]
+%endif
+.nextrow
+%if %0 == 4
+ movu m2, [r1-4]
+ movu m3, [r1-2]
+ movu m4, [r1+0]
+ ADDW m2, [r1+6], m5
+ ADDW m3, [r1+4], m5
+ ADDW m4, [r1+2], m5
+%else ; movu is slow on these processors
+%if mmsize==16
+ movu m2, [r1-4]
+ movu m0, [r1+6]
+ mova m6, m0
+ psrldq m0, 6
+
+ paddw m6, m2
+ PALIGNR m3, m0, m2, 2, m5
+ PALIGNR m7, m0, m2, 8, m5
+ paddw m3, m7
+ PALIGNR m4, m0, m2, 4, m5
+ PALIGNR m7, m0, m2, 6, m5
+ paddw m4, m7
+ SWAP 2, 6
+%else
+ movu m2, [r1-4]
+ movu m6, [r1+4]
+ PALIGNR m3, m6, m2, 2, m5
+ paddw m3, m6
+ PALIGNR m4, m6, m2, 4, m5
+ PALIGNR m7, m6, m2, 6, m5
+ paddw m4, m7
+ paddw m2, [r1+6]
+%endif
+%endif
+
+ FILT_H m2, m3, m4, p16
+ psraw m2, 1
+ pxor m0, m0
+ CLIPW m2, m0, m1
+ movu m3, [r4]
+ pavgw m2, m3
+ OP_MOV [r0], m2
+ add r0, r2
+ add r1, r2
+ add r4, r2
+ dec r3d
+ jg .nextrow
+ rep ret
+%endmacro
+
+MC_CACHE MC10
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc02(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro V_FILT 11
+v_filt%9_%10_10_%11:
+ add r4, r2
+.no_addr4:
+ FILT_V m0, m1, m2, m3, m4, m5, m6, m7
+ add r1, r2
+ add r0, r2
+ ret
+%endmacro
+
+INIT_MMX
+RESET_MM_PERMUTATION
+%assign i 0
+%rep 4
+V_FILT m0, m1, m2, m3, m4, m5, m6, m7, 4, i, mmxext
+SWAP 0,1,2,3,4,5
+%assign i i+1
+%endrep
+
+INIT_XMM
+RESET_MM_PERMUTATION
+%assign i 0
+%rep 6
+V_FILT m0, m1, m2, m3, m4, m5, m6, m7, 8, i, sse2
+SWAP 0,1,2,3,4,5
+%assign i i+1
+%endrep
+
+%macro MC02 3
+cglobal_mc %1, %2, mc02, %3, 3,4,8
+ PRELOAD_V
+
+ sub r0, r2
+%assign j 0
+%rep %3
+ %assign i (j % 6)
+ call v_filt%3_ %+ i %+ _10_%1.no_addr4
+ OP_MOV [r0], m0
+ SWAP 0,1,2,3,4,5
+ %assign j j+1
+%endrep
+ ret
+%endmacro
+
+MC MC02
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc01(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC01 3
+cglobal_mc %1, %2, mc01, %3, 3,5,8
+ mov r4, r1
+.body
+ PRELOAD_V
+
+ sub r4, r2
+ sub r0, r2
+%assign j 0
+%rep %3
+ %assign i (j % 6)
+ call v_filt%3_ %+ i %+ _10_%1
+ movu m7, [r4]
+ pavgw m0, m7
+ OP_MOV [r0], m0
+ SWAP 0,1,2,3,4,5
+ %assign j j+1
+%endrep
+ ret
+%endmacro
+
+MC MC01
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc03(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC03 3
+cglobal_mc %1, %2, mc03, %3, 3,5,8
+ lea r4, [r1+r2]
+ jmp stub_%2_h264_qpel%3_mc01_10_%1.body
+%endmacro
+
+MC MC03
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc11(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro H_FILT_AVG 3-4
+h_filt%2_%3_10_%1:
+;FILT_H with fewer registers and averaged with the FILT_V result
+;m6,m7 are tmp registers, m0 is the FILT_V result, the rest are to be used next in the next iteration
+;unfortunately I need three registers, so m5 will have to be re-read from memory
+ movu m5, [r4-4]
+ ADDW m5, [r4+6], m7
+ movu m6, [r4-2]
+ ADDW m6, [r4+4], m7
+ paddw m5, [pw_16]
+ psubw m5, m6 ; a-b
+ psraw m5, 2 ; (a-b)/4
+ psubw m5, m6 ; (a-b)/4-b
+ movu m6, [r4+0]
+ ADDW m6, [r4+2], m7
+ paddw m5, m6 ; (a-b)/4-b+c
+ psraw m5, 2 ; ((a-b)/4-b+c)/4
+ paddw m5, m6 ; ((a-b)/4-b+c)/4+c = (a-5*b+20*c)/16
+ psraw m5, 1
+ CLIPW m5, [pb_0], [pw_pixel_max]
+;avg FILT_V, FILT_H
+ pavgw m0, m5
+%if %0!=4
+ movu m5, [r1+r5]
+%endif
+ ret
+%endmacro
+
+INIT_MMX
+RESET_MM_PERMUTATION
+%assign i 0
+%rep 3
+H_FILT_AVG mmxext, 4, i
+SWAP 0,1,2,3,4,5
+%assign i i+1
+%endrep
+H_FILT_AVG mmxext, 4, i, 0
+
+INIT_XMM
+RESET_MM_PERMUTATION
+%assign i 0
+%rep 6
+%if i==1
+H_FILT_AVG sse2, 8, i, 0
+%else
+H_FILT_AVG sse2, 8, i
+%endif
+SWAP 0,1,2,3,4,5
+%assign i i+1
+%endrep
+
+%macro MC11 3
+; this REALLY needs x86_64
+cglobal_mc %1, %2, mc11, %3, 3,6,8
+ mov r4, r1
+.body
+ PRELOAD_V
+
+ sub r0, r2
+ sub r4, r2
+ mov r5, r2
+ neg r5
+%assign j 0
+%rep %3
+ %assign i (j % 6)
+ call v_filt%3_ %+ i %+ _10_%1
+ call h_filt%3_ %+ i %+ _10_%1
+%if %3==8 && i==1
+ movu m5, [r1+r5]
+%endif
+ OP_MOV [r0], m0
+ SWAP 0,1,2,3,4,5
+ %assign j j+1
+%endrep
+ ret
+%endmacro
+
+MC MC11
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc31(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC31 3
+cglobal_mc %1, %2, mc31, %3, 3,6,8
+ mov r4, r1
+ add r1, 2
+ jmp stub_%2_h264_qpel%3_mc11_10_%1.body
+%endmacro
+
+MC MC31
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc13(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC13 3
+cglobal_mc %1, %2, mc13, %3, 3,7,12
+ lea r4, [r1+r2]
+ jmp stub_%2_h264_qpel%3_mc11_10_%1.body
+%endmacro
+
+MC MC13
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc33(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC33 3
+cglobal_mc %1, %2, mc33, %3, 3,6,8
+ lea r4, [r1+r2]
+ add r1, 2
+ jmp stub_%2_h264_qpel%3_mc11_10_%1.body
+%endmacro
+
+MC MC33
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc22(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro FILT_H2 3
+ psubw %1, %2 ; a-b
+ psubw %2, %3 ; b-c
+ psllw %2, 2
+ psubw %1, %2 ; a-5*b+4*c
+ psllw %3, 4
+ paddw %1, %3 ; a-5*b+20*c
+%endmacro
+
+%macro FILT_VNRD 8
+ movu %6, [r1]
+ paddw %1, %6
+ mova %7, %2
+ paddw %7, %5
+ mova %8, %3
+ paddw %8, %4
+ FILT_H2 %1, %7, %8
+%endmacro
+
+%macro HV 2
+%ifidn %1,sse2
+%define PAD 12
+%define COUNT 2
+%else
+%define PAD 0
+%define COUNT 3
+%endif
+put_hv%2_10_%1:
+ neg r2 ; This actually saves instructions
+ lea r1, [r1+r2*2-mmsize+PAD]
+ lea r4, [rsp+PAD+gprsize]
+ mov r3d, COUNT
+.v_loop:
+ movu m0, [r1]
+ sub r1, r2
+ movu m1, [r1]
+ sub r1, r2
+ movu m2, [r1]
+ sub r1, r2
+ movu m3, [r1]
+ sub r1, r2
+ movu m4, [r1]
+ sub r1, r2
+%assign i 0
+%rep %2-1
+ FILT_VNRD m0, m1, m2, m3, m4, m5, m6, m7
+ psubw m0, [pad20]
+ movu [r4+i*mmsize*3], m0
+ sub r1, r2
+ SWAP 0,1,2,3,4,5
+%assign i i+1
+%endrep
+ FILT_VNRD m0, m1, m2, m3, m4, m5, m6, m7
+ psubw m0, [pad20]
+ movu [r4+i*mmsize*3], m0
+ add r4, mmsize
+ lea r1, [r1+r2*8+mmsize]
+%if %2==8
+ lea r1, [r1+r2*4]
+%endif
+ dec r3d
+ jg .v_loop
+ neg r2
+ ret
+%endmacro
+
+INIT_MMX
+HV mmxext, 4
+INIT_XMM
+HV sse2 , 8
+
+%macro H_LOOP 2
+%if num_mmregs > 8
+ %define s1 m8
+ %define s2 m9
+ %define s3 m10
+ %define d1 m11
+%else
+ %define s1 [tap1]
+ %define s2 [tap2]
+ %define s3 [tap3]
+ %define d1 [depad]
+%endif
+h%2_loop_op_%1:
+ movu m1, [r1+mmsize-4]
+ movu m2, [r1+mmsize-2]
+ mova m3, [r1+mmsize+0]
+ movu m4, [r1+mmsize+2]
+ movu m5, [r1+mmsize+4]
+ movu m6, [r1+mmsize+6]
+%if num_mmregs > 8
+ pmaddwd m1, s1
+ pmaddwd m2, s1
+ pmaddwd m3, s2
+ pmaddwd m4, s2
+ pmaddwd m5, s3
+ pmaddwd m6, s3
+ paddd m1, d1
+ paddd m2, d1
+%else
+ mova m0, s1
+ pmaddwd m1, m0
+ pmaddwd m2, m0
+ mova m0, s2
+ pmaddwd m3, m0
+ pmaddwd m4, m0
+ mova m0, s3
+ pmaddwd m5, m0
+ pmaddwd m6, m0
+ mova m0, d1
+ paddd m1, m0
+ paddd m2, m0
+%endif
+ paddd m3, m5
+ paddd m4, m6
+ paddd m1, m3
+ paddd m2, m4
+ psrad m1, 10
+ psrad m2, 10
+ pslld m2, 16
+ pand m1, [pd_0f]
+ por m1, m2
+%if num_mmregs <= 8
+ pxor m0, m0
+%endif
+ CLIPW m1, m0, m7
+ add r1, mmsize*3
+ ret
+%endmacro
+
+INIT_MMX
+H_LOOP mmxext, 4
+INIT_XMM
+H_LOOP sse2 , 8
+
+%macro MC22 3
+cglobal_mc %1, %2, mc22, %3, 3,7,12
+%define PAD mmsize*8*4*2 ; SIZE*16*4*sizeof(pixel)
+ mov r6, rsp ; backup stack pointer
+ and rsp, ~(mmsize-1) ; align stack
+ sub rsp, PAD
+
+ call put_hv%3_10_%1
+
+ mov r3d, %3
+ mova m7, [pw_pixel_max]
+%if num_mmregs > 8
+ pxor m0, m0
+ mova m8, [tap1]
+ mova m9, [tap2]
+ mova m10, [tap3]
+ mova m11, [depad]
+%endif
+ mov r1, rsp
+.h_loop:
+ call h%3_loop_op_%1
+
+ OP_MOV [r0], m1
+ add r0, r2
+ dec r3d
+ jg .h_loop
+
+ mov rsp, r6 ; restore stack pointer
+ ret
+%endmacro
+
+MC MC22
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc12(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC12 3
+cglobal_mc %1, %2, mc12, %3, 3,7,12
+%define PAD mmsize*8*4*2 ; SIZE*16*4*sizeof(pixel)
+ mov r6, rsp ; backup stack pointer
+ and rsp, ~(mmsize-1) ; align stack
+ sub rsp, PAD
+
+ call put_hv%3_10_%1
+
+ xor r4d, r4d
+.body
+ mov r3d, %3
+ pxor m0, m0
+ mova m7, [pw_pixel_max]
+%if num_mmregs > 8
+ mova m8, [tap1]
+ mova m9, [tap2]
+ mova m10, [tap3]
+ mova m11, [depad]
+%endif
+ mov r1, rsp
+.h_loop:
+ call h%3_loop_op_%1
+
+ movu m3, [r1+r4-2*mmsize] ; movu needed for mc32, etc
+ paddw m3, [depad2]
+ psrlw m3, 5
+ psubw m3, [unpad]
+ CLIPW m3, m0, m7
+ pavgw m1, m3
+
+ OP_MOV [r0], m1
+ add r0, r2
+ dec r3d
+ jg .h_loop
+
+ mov rsp, r6 ; restore stack pointer
+ ret
+%endmacro
+
+MC MC12
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc32(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC32 3
+cglobal_mc %1, %2, mc32, %3, 3,7,12
+%define PAD mmsize*8*3*2 ; SIZE*16*4*sizeof(pixel)
+ mov r6, rsp ; backup stack pointer
+ and rsp, ~(mmsize-1) ; align stack
+ sub rsp, PAD
+
+ call put_hv%3_10_%1
+
+ mov r4d, 2 ; sizeof(pixel)
+ jmp stub_%2_h264_qpel%3_mc12_10_%1.body
+%endmacro
+
+MC MC32
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc21(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro H_NRD 2
+put_h%2_10_%1:
+ add rsp, gprsize
+ mov r3d, %2
+ xor r4d, r4d
+ mova m6, [pad20]
+.nextrow
+ movu m2, [r5-4]
+ movu m3, [r5-2]
+ movu m4, [r5+0]
+ ADDW m2, [r5+6], m5
+ ADDW m3, [r5+4], m5
+ ADDW m4, [r5+2], m5
+
+ FILT_H2 m2, m3, m4
+ psubw m2, m6
+ mova [rsp+r4], m2
+ add r4d, mmsize*3
+ add r5, r2
+ dec r3d
+ jg .nextrow
+ sub rsp, gprsize
+ ret
+%endmacro
+
+INIT_MMX
+H_NRD mmxext, 4
+INIT_XMM
+H_NRD sse2 , 8
+
+%macro MC21 3
+cglobal_mc %1, %2, mc21, %3, 3,7,12
+ mov r5, r1
+.body
+%define PAD mmsize*8*3*2 ; SIZE*16*4*sizeof(pixel)
+ mov r6, rsp ; backup stack pointer
+ and rsp, ~(mmsize-1) ; align stack
+
+ sub rsp, PAD
+ call put_h%3_10_%1
+
+ sub rsp, PAD
+ call put_hv%3_10_%1
+
+ mov r4d, PAD-mmsize ; H buffer
+ jmp stub_%2_h264_qpel%3_mc12_10_%1.body
+%endmacro
+
+MC MC21
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc23(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC23 3
+cglobal_mc %1, %2, mc23, %3, 3,7,12
+ lea r5, [r1+r2]
+ jmp stub_%2_h264_qpel%3_mc21_10_%1.body
+%endmacro
+
+MC MC23
diff --git a/libavcodec/x86/h264_qpel_mmx.c b/libavcodec/x86/h264_qpel_mmx.c
index f5af44e82f..4dac05cfa3 100644
--- a/libavcodec/x86/h264_qpel_mmx.c
+++ b/libavcodec/x86/h264_qpel_mmx.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
+ * Copyright (c) 2011 Daniel Kang
*
* This file is part of FFmpeg.
*
@@ -1199,3 +1200,100 @@ H264_MC_816(H264_MC_HV, sse2)
H264_MC_816(H264_MC_H, ssse3)
H264_MC_816(H264_MC_HV, ssse3)
#endif
+
+
+
+//10bit
+#define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
+void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
+ (uint8_t *dst, uint8_t *src, int stride);
+
+#define LUMA_MC_ALL(DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 4, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
+
+#define LUMA_MC_816(DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
+
+LUMA_MC_ALL(10, mc00, mmxext)
+LUMA_MC_ALL(10, mc10, mmxext)
+LUMA_MC_ALL(10, mc20, mmxext)
+LUMA_MC_ALL(10, mc30, mmxext)
+LUMA_MC_ALL(10, mc01, mmxext)
+LUMA_MC_ALL(10, mc11, mmxext)
+LUMA_MC_ALL(10, mc21, mmxext)
+LUMA_MC_ALL(10, mc31, mmxext)
+LUMA_MC_ALL(10, mc02, mmxext)
+LUMA_MC_ALL(10, mc12, mmxext)
+LUMA_MC_ALL(10, mc22, mmxext)
+LUMA_MC_ALL(10, mc32, mmxext)
+LUMA_MC_ALL(10, mc03, mmxext)
+LUMA_MC_ALL(10, mc13, mmxext)
+LUMA_MC_ALL(10, mc23, mmxext)
+LUMA_MC_ALL(10, mc33, mmxext)
+
+LUMA_MC_816(10, mc00, sse2)
+LUMA_MC_816(10, mc10, sse2)
+LUMA_MC_816(10, mc10, sse2_cache64)
+LUMA_MC_816(10, mc10, ssse3_cache64)
+LUMA_MC_816(10, mc20, sse2)
+LUMA_MC_816(10, mc20, sse2_cache64)
+LUMA_MC_816(10, mc20, ssse3_cache64)
+LUMA_MC_816(10, mc30, sse2)
+LUMA_MC_816(10, mc30, sse2_cache64)
+LUMA_MC_816(10, mc30, ssse3_cache64)
+LUMA_MC_816(10, mc01, sse2)
+LUMA_MC_816(10, mc11, sse2)
+LUMA_MC_816(10, mc21, sse2)
+LUMA_MC_816(10, mc31, sse2)
+LUMA_MC_816(10, mc02, sse2)
+LUMA_MC_816(10, mc12, sse2)
+LUMA_MC_816(10, mc22, sse2)
+LUMA_MC_816(10, mc32, sse2)
+LUMA_MC_816(10, mc03, sse2)
+LUMA_MC_816(10, mc13, sse2)
+LUMA_MC_816(10, mc23, sse2)
+LUMA_MC_816(10, mc33, sse2)
+
+#define QPEL16_OPMC(OP, MC, MMX)\
+void ff_ ## OP ## _h264_qpel16_ ## MC ## _10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst , src , stride);\
+ ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
+ src += 8*stride;\
+ dst += 8*stride;\
+ ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst , src , stride);\
+ ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
+}
+
+#define QPEL16_OP(MC, MMX)\
+QPEL16_OPMC(put, MC, MMX)\
+QPEL16_OPMC(avg, MC, MMX)
+
+#define QPEL16(MMX)\
+QPEL16_OP(mc00, MMX)\
+QPEL16_OP(mc01, MMX)\
+QPEL16_OP(mc02, MMX)\
+QPEL16_OP(mc03, MMX)\
+QPEL16_OP(mc10, MMX)\
+QPEL16_OP(mc11, MMX)\
+QPEL16_OP(mc12, MMX)\
+QPEL16_OP(mc13, MMX)\
+QPEL16_OP(mc20, MMX)\
+QPEL16_OP(mc21, MMX)\
+QPEL16_OP(mc22, MMX)\
+QPEL16_OP(mc23, MMX)\
+QPEL16_OP(mc30, MMX)\
+QPEL16_OP(mc31, MMX)\
+QPEL16_OP(mc32, MMX)\
+QPEL16_OP(mc33, MMX)
+
+#if ARCH_X86_32 // ARCH_X86_64 implies sse2+
+QPEL16(mmxext)
+#endif
diff --git a/libavcodec/xsubdec.c b/libavcodec/xsubdec.c
index 9289a2554e..eaf2953172 100644
--- a/libavcodec/xsubdec.c
+++ b/libavcodec/xsubdec.c
@@ -18,6 +18,8 @@
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+
+#include "libavutil/mathematics.h"
#include "libavutil/imgutils.h"
#include "avcodec.h"
#include "get_bits.h"