Merge remote-tracking branch 'qatar/master'

* qatar/master: (23 commits) applehttp: Properly clean up if unable to probe a segment applehttp: Avoid reading uninitialized memory fate: Replace misleading "aac" in the name of an ADTS test with "adts". fate: Drop pointless "-an" from pictor test command. fate: split off image codec FATE tests into their own file fate: split off WMA codec FATE tests into their own file fate: split off lossless video and audio FATE tests into their own files fate: split off qtrle codec FATE tests into their own file fate: split off Ut Video codec FATE tests into their own file fate: split off screen codec FATE tests into their own file fate: split off Real Inc. codec FATE tests into their own file fate: split off AC-3 codec FATE tests into their own file mpegvideo: remove abort() in ff_find_unused_picture() rv40: NEON optimised loop filter strength selection rv40: rearrange loop filter functions configure: cosmetics: sort some lists where appropriate swscale_mmx: drop no longer required parameters from VSCALEX macros swscale: Mark yuv2planeX_8_mmx as MMX2; it contains MMX2 instructions. build: conditionally compile x86 H.264 chroma optimizations v410 encoder and decoder ... Conflicts: Changelog configure doc/developer.texi doc/general.texi libavcodec/arm/asm.S libavcodec/avcodec.h libavcodec/v410dec.c libavcodec/v410enc.c libavcodec/version.h libavcodec/x86/Makefile libavcodec/x86/dsputil_mmx.c libswscale/x86/swscale_mmx.c tests/Makefile tests/fate2.mak Merged-by: Michael Niedermayer <michaelni@gmx.at>
author: Michael Niedermayer <michaelni@gmx.at> 2011-12-14 23:58:10 +0100
committer: Michael Niedermayer <michaelni@gmx.at> 2011-12-14 23:58:10 +0100
commit: e462257242fc037c99206457d1316e1ff9e5306f (patch)
tree: 045910517a8b587f7a016b1c46403e1d1021f4f2 /libavcodec
parent: a1be5bc79d7ac4c7c7ed79c4d72b4f1945ecb55c (diff)
parent: 115a57302a7d6661426304bec3a5bc72d0edf4b0 (diff)
download: ffmpeg-e462257242fc037c99206457d1316e1ff9e5306f.tar.gz
15 files changed, 377 insertions, 186 deletions
diff --git a/libavcodec/arm/asm.S b/libavcodec/arm/asm.S
index d711cb8f11..2daac59242 100644
--- a/libavcodec/arm/asm.S
+++ b/libavcodec/arm/asm.S
@@ -113,10 +113,10 @@ T       add             \rn, \rn, \rm
 T       ldr             \rt, [\rn]
 .endm
 
-.macro  ldr_dpren       rt,  rn,  rm:vararg
-A       ldr             \rt, [\rn, -\rm]
-T       sub             \rt, \rn, \rm
-T       ldr             \rt, [\rt]
+.macro  ldr_dpre        rt,  rn,  rm:vararg
+A       ldr             \rt, [\rn, -\rm]!
+T       sub             \rn, \rn, \rm
+T       ldr             \rt, [\rn]
 .endm
 
 .macro  ldr_post        rt,  rn,  rm:vararg
diff --git a/libavcodec/arm/rv40dsp_init_neon.c b/libavcodec/arm/rv40dsp_init_neon.c
index 36d75e6fd8..59dddb6605 100644
--- a/libavcodec/arm/rv40dsp_init_neon.c
+++ b/libavcodec/arm/rv40dsp_init_neon.c
@@ -54,6 +54,13 @@ void ff_avg_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
 void ff_rv40_weight_func_16_neon(uint8_t *, uint8_t *, uint8_t *, int, int, int);
 void ff_rv40_weight_func_8_neon(uint8_t *, uint8_t *, uint8_t *, int, int, int);
 
+int ff_rv40_h_loop_filter_strength_neon(uint8_t *src, int stride,
+                                        int beta, int beta2, int edge,
+                                        int *p1, int *q1);
+int ff_rv40_v_loop_filter_strength_neon(uint8_t *src, int stride,
+                                        int beta, int beta2, int edge,
+                                        int *p1, int *q1);
+
 void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
 {
     c->put_pixels_tab[0][ 1] = ff_put_rv40_qpel16_mc10_neon;
@@ -116,4 +123,7 @@ void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
 
     c->rv40_weight_pixels_tab[0] = ff_rv40_weight_func_16_neon;
     c->rv40_weight_pixels_tab[1] = ff_rv40_weight_func_8_neon;
+
+    c->rv40_loop_filter_strength[0] = ff_rv40_h_loop_filter_strength_neon;
+    c->rv40_loop_filter_strength[1] = ff_rv40_v_loop_filter_strength_neon;
 }
diff --git a/libavcodec/arm/rv40dsp_neon.S b/libavcodec/arm/rv40dsp_neon.S
index 07ba8428c1..a4313d89f9 100644
--- a/libavcodec/arm/rv40dsp_neon.S
+++ b/libavcodec/arm/rv40dsp_neon.S
@@ -722,3 +722,89 @@ function ff_rv40_weight_func_8_neon, export=1
         bne             1b
         bx              lr
 endfunc
+
+function ff_rv40_h_loop_filter_strength_neon, export=1
+        pkhbt           r2,  r3,  r2,  lsl #18
+
+        ldr             r3,  [r0]
+        ldr_dpre        r12, r0,  r1
+        teq             r3,  r12
+        beq             1f
+
+        sub             r0,  r0,  r1,  lsl #1
+
+        vld1.32         {d4[]},   [r0,:32], r1  @ -3
+        vld1.32         {d0[]},   [r0,:32], r1  @ -2
+        vld1.32         {d4[1]},  [r0,:32], r1  @ -1
+        vld1.32         {d5[]},   [r0,:32], r1  @  0
+        vld1.32         {d1[]},   [r0,:32], r1  @  1
+        vld1.32         {d5[0]},  [r0,:32], r1  @  2
+
+        vpaddl.u8       q8,  q0                 @ -2, -2, -2, -2,  1,  1,  1,  1
+        vpaddl.u8       q9,  q2                 @ -3, -3, -1, -1,  2,  2,  0,  0
+        vdup.32         d30, r2                 @ beta2, beta << 2
+        vpadd.u16       d16, d16, d17           @ -2, -2,  1,  1
+        vpadd.u16       d18, d18, d19           @ -3, -1,  2,  0
+        vabd.u16        d16, d18, d16
+        vclt.u16        d16, d16, d30
+
+        ldrd            r2,  r3,  [sp, #4]
+        vmovl.u16       q12, d16
+        vtrn.16         d16, d17
+        vshr.u32        q12, q12, #15
+        ldr             r0,  [sp]
+        vst1.32         {d24[1]}, [r2,:32]
+        vst1.32         {d25[1]}, [r3,:32]
+
+        cmp             r0,  #0
+        it              eq
+        bxeq            lr
+
+        vand            d18, d16, d17
+        vtrn.32         d18, d19
+        vand            d18, d18, d19
+        vmov.u16        r0,  d18[0]
+        bx              lr
+1:
+        ldrd            r2,  r3,  [sp, #4]
+        mov             r0,  #0
+        str             r0,  [r2]
+        str             r0,  [r3]
+        bx              lr
+endfunc
+
+function ff_rv40_v_loop_filter_strength_neon, export=1
+        sub             r0,  r0,  #3
+        pkhbt           r2,  r3,  r2,  lsl #18
+
+        vld1.8          {d0},     [r0], r1
+        vld1.8          {d1},     [r0], r1
+        vld1.8          {d2},     [r0], r1
+        vld1.8          {d3},     [r0], r1
+
+        vaddl.u8        q0,  d0,  d1
+        vaddl.u8        q1,  d2,  d3
+        vdup.32         q15, r2
+        vadd.u16        q0,  q0,  q1            @ -3, -2, -1,  0,  1,  2
+        vext.16         q1,  q0,  q0,  #1       @ -2, -1,  0,  1,  2
+        vabd.u16        q0,  q1,  q0
+        vclt.u16        q0,  q0,  q15
+
+        ldrd            r2,  r3,  [sp, #4]
+        vmovl.u16       q1,  d0
+        vext.16         d1,  d0,  d1,  #3
+        vshr.u32        q1,  q1,  #15
+        ldr             r0,  [sp]
+        vst1.32         {d2[1]},  [r2,:32]
+        vst1.32         {d3[1]},  [r3,:32]
+
+        cmp             r0,  #0
+        it              eq
+        bxeq            lr
+
+        vand            d0,  d0,  d1
+        vtrn.16         d0,  d1
+        vand            d0,  d0,  d1
+        vmov.u16        r0,  d0[0]
+        bx              lr
+endfunc
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 4290f8b4be..5a0e0ed380 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -4629,16 +4629,8 @@ int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width,
 unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
 
 /**
-<<<<<<< HEAD
- * Logs a generic warning message about a missing feature. This function is
- * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.)
-||||||| merged common ancestors
- * Logs a generic warning message about a missing feature. This function is
- * intended to be used internally by Libav (libavcodec, libavformat, etc.)
-=======
  * Log a generic warning message about a missing feature. This function is
- * intended to be used internally by Libav (libavcodec, libavformat, etc.)
->>>>>>> qatar/master
+ * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.)
  * only, and would normally not be used by applications.
  * @param[in] avc a pointer to an arbitrary struct of which the first field is
  * a pointer to an AVClass struct
diff --git a/libavcodec/h261dec.c b/libavcodec/h261dec.c
index 9b34690e17..ff3d05c514 100644
--- a/libavcodec/h261dec.c
+++ b/libavcodec/h261dec.c
@@ -572,6 +572,8 @@ retry:
     //we need to set current_picture_ptr before reading the header, otherwise we cannot store anyting im there
     if (s->current_picture_ptr == NULL || s->current_picture_ptr->f.data[0]) {
         int i= ff_find_unused_picture(s, 0);
+        if (i < 0)
+            return i;
         s->current_picture_ptr= &s->picture[i];
     }
 
diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index 9bad8e6c74..0d741d80c2 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -410,6 +410,8 @@ retry:
      * otherwise we cannot store anyting in there */
     if (s->current_picture_ptr == NULL || s->current_picture_ptr->f.data[0]) {
         int i= ff_find_unused_picture(s, 0);
+        if (i < 0)
+            return i;
         s->current_picture_ptr= &s->picture[i];
     }
 
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 2b792642c5..8924046dc4 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -1095,21 +1095,7 @@ int ff_find_unused_picture(MpegEncContext *s, int shared)
         }
     }
 
-    av_log(s->avctx, AV_LOG_FATAL,
-           "Internal error, picture buffer overflow\n");
-    /* We could return -1, but the codec would crash trying to draw into a
-     * non-existing frame anyway. This is safer than waiting for a random crash.
-     * Also the return of this is never useful, an encoder must only allocate
-     * as much as allowed in the specification. This has no relationship to how
-     * much libavcodec could allocate (and MAX_PICTURE_COUNT is always large
-     * enough for such valid streams).
-     * Plus, a decoder has to check stream validity and remove frames if too
-     * many reference frames are around. Waiting for "OOM" is not correct at
-     * all. Similarly, missing reference frames have to be replaced by
-     * interpolated/MC frames, anything else is a bug in the codec ...
-     */
-    abort();
-    return -1;
+    return AVERROR_INVALIDDATA;
 }
 
 static void update_noise_reduction(MpegEncContext *s){
@@ -1167,6 +1153,8 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
             pic= s->current_picture_ptr; //we already have a unused image (maybe it was set before reading the header)
         else{
             i= ff_find_unused_picture(s, 0);
+            if (i < 0)
+                return i;
             pic= &s->picture[i];
         }
 
@@ -1222,6 +1210,8 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 
             /* Allocate a dummy frame */
             i= ff_find_unused_picture(s, 0);
+            if (i < 0)
+                return i;
             s->last_picture_ptr= &s->picture[i];
             s->last_picture_ptr->f.key_frame = 0;
             if(ff_alloc_picture(s, s->last_picture_ptr, 0) < 0)
@@ -1238,6 +1228,8 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
         if ((s->next_picture_ptr == NULL || s->next_picture_ptr->f.data[0] == NULL) && s->pict_type == AV_PICTURE_TYPE_B) {
             /* Allocate a dummy frame */
             i= ff_find_unused_picture(s, 0);
+            if (i < 0)
+                return i;
             s->next_picture_ptr= &s->picture[i];
             s->next_picture_ptr->f.key_frame = 0;
             if(ff_alloc_picture(s, s->next_picture_ptr, 0) < 0)
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 40dea427d4..83c4932d5b 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -864,6 +864,8 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
 
     if(direct){
         i= ff_find_unused_picture(s, 1);
+        if (i < 0)
+            return i;
 
         pic= (AVFrame*)&s->picture[i];
         pic->reference= 3;
@@ -877,6 +879,8 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
         }
     }else{
         i= ff_find_unused_picture(s, 0);
+        if (i < 0)
+            return i;
 
         pic= (AVFrame*)&s->picture[i];
         pic->reference= 3;
@@ -1210,6 +1214,8 @@ no_output_pic:
             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
 
             int i= ff_find_unused_picture(s, 0);
+            if (i < 0)
+                return i;
             Picture *pic= &s->picture[i];
 
             pic->f.reference = s->reordered_input_picture[0]->f.reference;
diff --git a/libavcodec/rv34dsp.h b/libavcodec/rv34dsp.h
index cf6e14d305..01352ea793 100644
--- a/libavcodec/rv34dsp.h
+++ b/libavcodec/rv34dsp.h
@@ -36,10 +36,18 @@ typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/,
 
 typedef void (*rv34_inv_transform_func)(DCTELEM *block);
 
-typedef void (*rv40_loop_filter_func)(uint8_t *src, int stride, int dmode,
-                                      int lim_q1, int lim_p1, int alpha,
-                                      int beta, int beta2, int chroma,
-                                      int edge);
+typedef void (*rv40_weak_loop_filter_func)(uint8_t *src, int stride,
+                                           int filter_p1, int filter_q1,
+                                           int alpha, int beta,
+                                           int lims, int lim_q1, int lim_p1);
+
+typedef void (*rv40_strong_loop_filter_func)(uint8_t *src, int stride,
+                                             int alpha, int lims,
+                                             int dmode, int chroma);
+
+typedef int (*rv40_loop_filter_strength_func)(uint8_t *src, int stride,
+                                              int beta, int beta2, int edge,
+                                              int *p1, int *q1);
 
 typedef struct RV34DSPContext {
     qpel_mc_func put_pixels_tab[4][16];
@@ -49,8 +57,9 @@ typedef struct RV34DSPContext {
     rv40_weight_func rv40_weight_pixels_tab[2];
     rv34_inv_transform_func rv34_inv_transform_tab[2];
     void (*rv34_dequant4x4)(DCTELEM *block, int Qdc, int Q);
-    rv40_loop_filter_func rv40_h_loop_filter;
-    rv40_loop_filter_func rv40_v_loop_filter;
+    rv40_weak_loop_filter_func rv40_weak_loop_filter[2];
+    rv40_strong_loop_filter_func rv40_strong_loop_filter[2];
+    rv40_loop_filter_strength_func rv40_loop_filter_strength[2];
 } RV34DSPContext;
 
 void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp);
diff --git a/libavcodec/rv40.c b/libavcodec/rv40.c
index 8dd78f1fb8..fe104c1908 100644
--- a/libavcodec/rv40.c
+++ b/libavcodec/rv40.c
@@ -297,6 +297,34 @@ enum RV40BlockPos{
 static const int neighbour_offs_x[4] = { 0,  0, -1, 0 };
 static const int neighbour_offs_y[4] = { 0, -1,  0, 1 };
 
+static void rv40_adaptive_loop_filter(RV34DSPContext *rdsp,
+                                      uint8_t *src, int stride, int dmode,
+                                      int lim_q1, int lim_p1,
+                                      int alpha, int beta, int beta2,
+                                      int chroma, int edge, int dir)
+{
+    int filter_p1, filter_q1;
+    int strong;
+    int lims;
+
+    strong = rdsp->rv40_loop_filter_strength[dir](src, stride, beta, beta2,
+                                                  edge, &filter_p1, &filter_q1);
+
+    lims = filter_p1 + filter_q1 + ((lim_q1 + lim_p1) >> 1) + 1;
+
+    if (strong) {
+        rdsp->rv40_strong_loop_filter[dir](src, stride, alpha,
+                                           lims, dmode, chroma);
+    } else if (filter_p1 & filter_q1) {
+        rdsp->rv40_weak_loop_filter[dir](src, stride, 1, 1, alpha, beta,
+                                         lims, lim_q1, lim_p1);
+    } else if (filter_p1 | filter_q1) {
+        rdsp->rv40_weak_loop_filter[dir](src, stride, filter_p1, filter_q1,
+                                         alpha, beta, lims >> 1, lim_q1 >> 1,
+                                         lim_p1 >> 1);
+    }
+}
+
 /**
  * RV40 loop filtering function
  */
@@ -433,10 +461,11 @@ static void rv40_loop_filter(RV34DecContext *r, int row)
                 // if bottom block is coded then we can filter its top edge
                 // (or bottom edge of this block, which is the same)
                 if(y_h_deblock & (MASK_BOTTOM << ij)){
-                    r->rdsp.rv40_h_loop_filter(Y+4*s->linesize, s->linesize, dither,
-                                       y_to_deblock & (MASK_BOTTOM << ij) ? clip[POS_CUR] : 0,
-                                       clip_cur,
-                                       alpha, beta, betaY, 0, 0);
+                    rv40_adaptive_loop_filter(&r->rdsp, Y+4*s->linesize,
+                                              s->linesize, dither,
+                                              y_to_deblock & (MASK_BOTTOM << ij) ? clip[POS_CUR] : 0,
+                                              clip_cur, alpha, beta, betaY,
+                                              0, 0, 0);
                 }
                 // filter left block edge in ordinary mode (with low filtering strength)
                 if(y_v_deblock & (MASK_CUR << ij) && (i || !(mb_strong[POS_CUR] || mb_strong[POS_LEFT]))){
@@ -444,25 +473,25 @@ static void rv40_loop_filter(RV34DecContext *r, int row)
                         clip_left = mvmasks[POS_LEFT] & (MASK_RIGHT << j) ? clip[POS_LEFT] : 0;
                     else
                         clip_left = y_to_deblock & (MASK_CUR << (ij-1)) ? clip[POS_CUR] : 0;
-                    r->rdsp.rv40_v_loop_filter(Y, s->linesize, dither,
-                                       clip_cur,
-                                       clip_left,
-                                       alpha, beta, betaY, 0, 0);
+                    rv40_adaptive_loop_filter(&r->rdsp, Y, s->linesize, dither,
+                                              clip_cur,
+                                              clip_left,
+                                              alpha, beta, betaY, 0, 0, 1);
                 }
                 // filter top edge of the current macroblock when filtering strength is high
                 if(!j && y_h_deblock & (MASK_CUR << i) && (mb_strong[POS_CUR] || mb_strong[POS_TOP])){
-                    r->rdsp.rv40_h_loop_filter(Y, s->linesize, dither,
+                    rv40_adaptive_loop_filter(&r->rdsp, Y, s->linesize, dither,
                                        clip_cur,
                                        mvmasks[POS_TOP] & (MASK_TOP << i) ? clip[POS_TOP] : 0,
-                                       alpha, beta, betaY, 0, 1);
+                                       alpha, beta, betaY, 0, 1, 0);
                 }
                 // filter left block edge in edge mode (with high filtering strength)
                 if(y_v_deblock & (MASK_CUR << ij) && !i && (mb_strong[POS_CUR] || mb_strong[POS_LEFT])){
                     clip_left = mvmasks[POS_LEFT] & (MASK_RIGHT << j) ? clip[POS_LEFT] : 0;
-                    r->rdsp.rv40_v_loop_filter(Y, s->linesize, dither,
+                    rv40_adaptive_loop_filter(&r->rdsp, Y, s->linesize, dither,
                                        clip_cur,
                                        clip_left,
-                                       alpha, beta, betaY, 0, 1);
+                                       alpha, beta, betaY, 0, 1, 1);
                 }
             }
         }
@@ -474,34 +503,34 @@ static void rv40_loop_filter(RV34DecContext *r, int row)
                     int clip_cur = c_to_deblock[k] & (MASK_CUR << ij) ? clip[POS_CUR] : 0;
                     if(c_h_deblock[k] & (MASK_CUR << (ij+2))){
                         int clip_bot = c_to_deblock[k] & (MASK_CUR << (ij+2)) ? clip[POS_CUR] : 0;
-                        r->rdsp.rv40_h_loop_filter(C+4*s->uvlinesize, s->uvlinesize, i*8,
+                        rv40_adaptive_loop_filter(&r->rdsp, C+4*s->uvlinesize, s->uvlinesize, i*8,
                                            clip_bot,
                                            clip_cur,
-                                           alpha, beta, betaC, 1, 0);
+                                           alpha, beta, betaC, 1, 0, 0);
                     }
                     if((c_v_deblock[k] & (MASK_CUR << ij)) && (i || !(mb_strong[POS_CUR] || mb_strong[POS_LEFT]))){
                         if(!i)
                             clip_left = uvcbp[POS_LEFT][k] & (MASK_CUR << (2*j+1)) ? clip[POS_LEFT] : 0;
                         else
                             clip_left = c_to_deblock[k]    & (MASK_CUR << (ij-1))  ? clip[POS_CUR]  : 0;
-                        r->rdsp.rv40_v_loop_filter(C, s->uvlinesize, j*8,
+                        rv40_adaptive_loop_filter(&r->rdsp, C, s->uvlinesize, j*8,
                                            clip_cur,
                                            clip_left,
-                                           alpha, beta, betaC, 1, 0);
+                                           alpha, beta, betaC, 1, 0, 1);
                     }
                     if(!j && c_h_deblock[k] & (MASK_CUR << ij) && (mb_strong[POS_CUR] || mb_strong[POS_TOP])){
                         int clip_top = uvcbp[POS_TOP][k] & (MASK_CUR << (ij+2)) ? clip[POS_TOP] : 0;
-                        r->rdsp.rv40_h_loop_filter(C, s->uvlinesize, i*8,
+                        rv40_adaptive_loop_filter(&r->rdsp, C, s->uvlinesize, i*8,
                                            clip_cur,
                                            clip_top,
-                                           alpha, beta, betaC, 1, 1);
+                                           alpha, beta, betaC, 1, 1, 0);
                     }
                     if(c_v_deblock[k] & (MASK_CUR << ij) && !i && (mb_strong[POS_CUR] || mb_strong[POS_LEFT])){
                         clip_left = uvcbp[POS_LEFT][k] & (MASK_CUR << (2*j+1)) ? clip[POS_LEFT] : 0;
-                        r->rdsp.rv40_v_loop_filter(C, s->uvlinesize, j*8,
+                        rv40_adaptive_loop_filter(&r->rdsp, C, s->uvlinesize, j*8,
                                            clip_cur,
                                            clip_left,
-                                           alpha, beta, betaC, 1, 1);
+                                           alpha, beta, betaC, 1, 1, 1);
                     }
                 }
             }
diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c
index 1fc247e88b..913ced046d 100644
--- a/libavcodec/rv40dsp.c
+++ b/libavcodec/rv40dsp.c
@@ -314,142 +314,194 @@ static const uint8_t rv40_dither_r[16] = {
 /**
  * weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1
  */
-static inline void rv40_weak_loop_filter(uint8_t *src, const int step,
-                                         const int filter_p1, const int filter_q1,
-                                         const int alpha, const int beta,
-                                         const int lim_p0q0,
-                                         const int lim_q1, const int lim_p1,
-                                         const int diff_p1p0, const int diff_q1q0,
-                                         const int diff_p1p2, const int diff_q1q2)
+static av_always_inline void rv40_weak_loop_filter(uint8_t *src,
+                                                   const int step,
+                                                   const int stride,
+                                                   const int filter_p1,
+                                                   const int filter_q1,
+                                                   const int alpha,
+                                                   const int beta,
+                                                   const int lim_p0q0,
+                                                   const int lim_q1,
+                                                   const int lim_p1)
 {
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-    int t, u, diff;
-
-    t = src[0*step] - src[-1*step];
-    if(!t)
-        return;
-    u = (alpha * FFABS(t)) >> 7;
-    if(u > 3 - (filter_p1 && filter_q1))
-        return;
-
-    t <<= 2;
-    if(filter_p1 && filter_q1)
-        t += src[-2*step] - src[1*step];
-    diff = CLIP_SYMM((t + 4) >> 3, lim_p0q0);
-    src[-1*step] = cm[src[-1*step] + diff];
-    src[ 0*step] = cm[src[ 0*step] - diff];
-    if(FFABS(diff_p1p2) <= beta && filter_p1){
-        t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
-        src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim_p1)];
+    int i, t, u, diff;
+
+    for (i = 0; i < 4; i++, src += stride) {
+        int diff_p1p0 = src[-2*step] - src[-1*step];
+        int diff_q1q0 = src[ 1*step] - src[ 0*step];
+        int diff_p1p2 = src[-2*step] - src[-3*step];
+        int diff_q1q2 = src[ 1*step] - src[ 2*step];
+
+        t = src[0*step] - src[-1*step];
+        if (!t)
+            continue;
+
+        u = (alpha * FFABS(t)) >> 7;
+        if (u > 3 - (filter_p1 && filter_q1))
+            continue;
+
+        t <<= 2;
+        if (filter_p1 && filter_q1)
+            t += src[-2*step] - src[1*step];
+
+        diff = CLIP_SYMM((t + 4) >> 3, lim_p0q0);
+        src[-1*step] = cm[src[-1*step] + diff];
+        src[ 0*step] = cm[src[ 0*step] - diff];
+
+        if (filter_p1 && FFABS(diff_p1p2) <= beta) {
+            t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
+            src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim_p1)];
+        }
+
+        if (filter_q1 && FFABS(diff_q1q2) <= beta) {
+            t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
+            src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim_q1)];
+        }
     }
-    if(FFABS(diff_q1q2) <= beta && filter_q1){
-        t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
-        src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim_q1)];
+}
+
+static void rv40_h_weak_loop_filter(uint8_t *src, const int stride,
+                                    const int filter_p1, const int filter_q1,
+                                    const int alpha, const int beta,
+                                    const int lim_p0q0, const int lim_q1,
+                                    const int lim_p1)
+{
+    rv40_weak_loop_filter(src, stride, 1, filter_p1, filter_q1,
+                          alpha, beta, lim_p0q0, lim_q1, lim_p1);
+}
+
+static void rv40_v_weak_loop_filter(uint8_t *src, const int stride,
+                                    const int filter_p1, const int filter_q1,
+                                    const int alpha, const int beta,
+                                    const int lim_p0q0, const int lim_q1,
+                                    const int lim_p1)
+{
+    rv40_weak_loop_filter(src, 1, stride, filter_p1, filter_q1,
+                          alpha, beta, lim_p0q0, lim_q1, lim_p1);
+}
+
+static av_always_inline void rv40_strong_loop_filter(uint8_t *src,
+                                                     const int step,
+                                                     const int stride,
+                                                     const int alpha,
+                                                     const int lims,
+                                                     const int dmode,
+                                                     const int chroma)
+{
+    int i;
+
+    for(i = 0; i < 4; i++, src += stride){
+        int sflag, p0, q0, p1, q1;
+        int t = src[0*step] - src[-1*step];
+
+        if (!t)
+            continue;
+
+        sflag = (alpha * FFABS(t)) >> 7;
+        if (sflag > 1)
+            continue;
+
+        p0 = (25*src[-3*step] + 26*src[-2*step] + 26*src[-1*step] +
+              26*src[ 0*step] + 25*src[ 1*step] +
+              rv40_dither_l[dmode + i]) >> 7;
+
+        q0 = (25*src[-2*step] + 26*src[-1*step] + 26*src[ 0*step] +
+              26*src[ 1*step] + 25*src[ 2*step] +
+              rv40_dither_r[dmode + i]) >> 7;
+
+        if (sflag) {
+            p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
+            q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
+        }
+
+        p1 = (25*src[-4*step] + 26*src[-3*step] + 26*src[-2*step] + 26*p0 +
+              25*src[ 0*step] + rv40_dither_l[dmode + i]) >> 7;
+        q1 = (25*src[-1*step] + 26*q0 + 26*src[ 1*step] + 26*src[ 2*step] +
+              25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7;
+
+        if (sflag) {
+            p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
+            q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
+        }
+
+        src[-2*step] = p1;
+        src[-1*step] = p0;
+        src[ 0*step] = q0;
+        src[ 1*step] = q1;
+
+        if(!chroma){
+            src[-3*step] = (25*src[-1*step] + 26*src[-2*step] +
+                            51*src[-3*step] + 26*src[-4*step] + 64) >> 7;
+            src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] +
+                            51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7;
+        }
     }
 }
 
-static av_always_inline void rv40_adaptive_loop_filter(uint8_t *src, const int step,
-                                             const int stride, const int dmode,
-                                             const int lim_q1, const int lim_p1,
-                                             const int alpha,
-                                             const int beta, const int beta2,
-                                             const int chroma, const int edge)
+static void rv40_h_strong_loop_filter(uint8_t *src, const int stride,
+                                      const int alpha, const int lims,
+                                      const int dmode, const int chroma)
+{
+    rv40_strong_loop_filter(src, stride, 1, alpha, lims, dmode, chroma);
+}
+
+static void rv40_v_strong_loop_filter(uint8_t *src, const int stride,
+                                      const int alpha, const int lims,
+                                      const int dmode, const int chroma)
+{
+    rv40_strong_loop_filter(src, 1, stride, alpha, lims, dmode, chroma);
+}
+
+static av_always_inline int rv40_loop_filter_strength(uint8_t *src,
+                                                      int step, int stride,
+                                                      int beta, int beta2,
+                                                      int edge,
+                                                      int *p1, int *q1)
 {
-    int diff_p1p0[4], diff_q1q0[4], diff_p1p2[4], diff_q1q2[4];
     int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0;
+    int strong0 = 0, strong1 = 0;
     uint8_t *ptr;
-    int flag_strong0 = 1, flag_strong1 = 1;
-    int filter_p1, filter_q1;
     int i;
-    int lims;
 
-    for(i = 0, ptr = src; i < 4; i++, ptr += stride){
-        diff_p1p0[i] = ptr[-2*step] - ptr[-1*step];
-        diff_q1q0[i] = ptr[ 1*step] - ptr[ 0*step];
-        sum_p1p0 += diff_p1p0[i];
-        sum_q1q0 += diff_q1q0[i];
-    }
-    filter_p1 = FFABS(sum_p1p0) < (beta<<2);
-    filter_q1 = FFABS(sum_q1q0) < (beta<<2);
-    if(!filter_p1 && !filter_q1)
-        return;
-
-    for(i = 0, ptr = src; i < 4; i++, ptr += stride){
-        diff_p1p2[i] = ptr[-2*step] - ptr[-3*step];
-        diff_q1q2[i] = ptr[ 1*step] - ptr[ 2*step];
-        sum_p1p2 += diff_p1p2[i];
-        sum_q1q2 += diff_q1q2[i];
+    for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
+        sum_p1p0 += ptr[-2*step] - ptr[-1*step];
+        sum_q1q0 += ptr[ 1*step] - ptr[ 0*step];
     }
 
-    if(edge){
-        flag_strong0 = filter_p1 && (FFABS(sum_p1p2) < beta2);
-        flag_strong1 = filter_q1 && (FFABS(sum_q1q2) < beta2);
-    }else{
-        flag_strong0 = flag_strong1 = 0;
-    }
+    *p1 = FFABS(sum_p1p0) < (beta << 2);
+    *q1 = FFABS(sum_q1q0) < (beta << 2);
 
-    lims = filter_p1 + filter_q1 + ((lim_q1 + lim_p1) >> 1) + 1;
-    if(flag_strong0 && flag_strong1){ /* strong filtering */
-        for(i = 0; i < 4; i++, src += stride){
-            int sflag, p0, q0, p1, q1;
-            int t = src[0*step] - src[-1*step];
-
-            if(!t) continue;
-            sflag = (alpha * FFABS(t)) >> 7;
-            if(sflag > 1) continue;
-
-            p0 = (25*src[-3*step] + 26*src[-2*step]
-                + 26*src[-1*step]
-                + 26*src[ 0*step] + 25*src[ 1*step] + rv40_dither_l[dmode + i]) >> 7;
-            q0 = (25*src[-2*step] + 26*src[-1*step]
-                + 26*src[ 0*step]
-                + 26*src[ 1*step] + 25*src[ 2*step] + rv40_dither_r[dmode + i]) >> 7;
-            if(sflag){
-                p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
-                q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
-            }
-            p1 = (25*src[-4*step] + 26*src[-3*step]
-                + 26*src[-2*step]
-                + 26*p0           + 25*src[ 0*step] + rv40_dither_l[dmode + i]) >> 7;
-            q1 = (25*src[-1*step] + 26*q0
-                + 26*src[ 1*step]
-                + 26*src[ 2*step] + 25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7;
-            if(sflag){
-                p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
-                q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
-            }
-            src[-2*step] = p1;
-            src[-1*step] = p0;
-            src[ 0*step] = q0;
-            src[ 1*step] = q1;
-            if(!chroma){
-                src[-3*step] = (25*src[-1*step] + 26*src[-2*step] + 51*src[-3*step] + 26*src[-4*step] + 64) >> 7;
-                src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] + 51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7;
-            }
-        }
-    }else if(filter_p1 && filter_q1){
-        for(i = 0; i < 4; i++, src += stride)
-            rv40_weak_loop_filter(src, step, 1, 1, alpha, beta, lims, lim_q1, lim_p1,
-                                  diff_p1p0[i], diff_q1q0[i], diff_p1p2[i], diff_q1q2[i]);
-    }else{
-        for(i = 0; i < 4; i++, src += stride)
-            rv40_weak_loop_filter(src, step, filter_p1, filter_q1,
-                                  alpha, beta, lims>>1, lim_q1>>1, lim_p1>>1,
-                                  diff_p1p0[i], diff_q1q0[i], diff_p1p2[i], diff_q1q2[i]);
+    if(!*p1 && !*q1)
+        return 0;
+
+    if (!edge)
+        return 0;
+
+    for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
+        sum_p1p2 += ptr[-2*step] - ptr[-3*step];
+        sum_q1q2 += ptr[ 1*step] - ptr[ 2*step];
     }
+
+    strong0 = *p1 && (FFABS(sum_p1p2) < beta2);
+    strong1 = *q1 && (FFABS(sum_q1q2) < beta2);
+
+    return strong0 && strong1;
 }
 
-static void rv40_v_loop_filter(uint8_t *src, int stride, int dmode,
-                               int lim_q1, int lim_p1,
-                               int alpha, int beta, int beta2, int chroma, int edge){
-    rv40_adaptive_loop_filter(src, 1, stride, dmode, lim_q1, lim_p1,
-                              alpha, beta, beta2, chroma, edge);
+static int rv40_h_loop_filter_strength(uint8_t *src, int stride,
+                                       int beta, int beta2, int edge,
+                                       int *p1, int *q1)
+{
+    return rv40_loop_filter_strength(src, stride, 1, beta, beta2, edge, p1, q1);
 }
-static void rv40_h_loop_filter(uint8_t *src, int stride, int dmode,
-                               int lim_q1, int lim_p1,
-                               int alpha, int beta, int beta2, int chroma, int edge){
-    rv40_adaptive_loop_filter(src, stride, 1, dmode, lim_q1, lim_p1,
-                              alpha, beta, beta2, chroma, edge);
+
+static int rv40_v_loop_filter_strength(uint8_t *src, int stride,
+                                       int beta, int beta2, int edge,
+                                       int *p1, int *q1)
+{
+    return rv40_loop_filter_strength(src, 1, stride, beta, beta2, edge, p1, q1);
 }
 
 av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
@@ -529,8 +581,12 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
     c->rv40_weight_pixels_tab[0] = rv40_weight_func_16;
     c->rv40_weight_pixels_tab[1] = rv40_weight_func_8;
 
-    c->rv40_h_loop_filter = rv40_h_loop_filter;
-    c->rv40_v_loop_filter = rv40_v_loop_filter;
+    c->rv40_weak_loop_filter[0]     = rv40_h_weak_loop_filter;
+    c->rv40_weak_loop_filter[1]     = rv40_v_weak_loop_filter;
+    c->rv40_strong_loop_filter[0]   = rv40_h_strong_loop_filter;
+    c->rv40_strong_loop_filter[1]   = rv40_v_strong_loop_filter;
+    c->rv40_loop_filter_strength[0] = rv40_h_loop_filter_strength;
+    c->rv40_loop_filter_strength[1] = rv40_v_loop_filter_strength;
 
     if (HAVE_MMX)
         ff_rv40dsp_init_x86(c, dsp);
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 4dab137eb7..771574df78 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -5582,6 +5582,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
      * otherwise we cannot store anything in there. */
     if (s->current_picture_ptr == NULL || s->current_picture_ptr->f.data[0]) {
         int i = ff_find_unused_picture(s, 0);
+        if (i < 0)
+            goto err;
         s->current_picture_ptr = &s->picture[i];
     }
 
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 870d7e0a30..84d4dadfa1 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -21,7 +21,7 @@
 #define AVCODEC_VERSION_H
 
 #define LIBAVCODEC_VERSION_MAJOR 53
-#define LIBAVCODEC_VERSION_MINOR 44
+#define LIBAVCODEC_VERSION_MINOR 45
 #define LIBAVCODEC_VERSION_MICRO  0
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 4b85514676..48ff39b0a8 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -11,6 +11,9 @@ YASM-OBJS-$(CONFIG_FFT)                += x86/fft_mmx.o                 \
 
 YASM-OBJS-$(CONFIG_DWT)                += x86/dwt_yasm.o
 
+YASM-OBJS-$(CONFIG_H264CHROMA)         += x86/h264_chromamc.o           \
+                                          x86/h264_chromamc_10bit.o
+
 MMX-OBJS-$(CONFIG_H264DSP)             += x86/h264dsp_mmx.o
 YASM-OBJS-$(CONFIG_H264DSP)            += x86/h264_deblock.o            \
                                           x86/h264_deblock_10bit.o      \
@@ -59,8 +62,6 @@ MMX-OBJS-$(CONFIG_VP8_DECODER)         += x86/vp8dsp-init.o
 MMX-OBJS-$(HAVE_YASM)                  += x86/dsputil_yasm.o            \
                                           x86/deinterlace.o             \
                                           x86/fmtconvert.o              \
-                                          x86/h264_chromamc.o           \
-                                          x86/h264_chromamc_10bit.o     \
                                           x86/h264_qpel_10bit.o         \
                                           $(YASM-OBJS-yes)
 
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index a27c3b53d6..75bcae8110 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2548,7 +2548,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         }
 
 #if HAVE_YASM
-        if (!high_bit_depth) {
+        if (!high_bit_depth && CONFIG_H264CHROMA) {
         c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
         c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
         }
@@ -2652,13 +2652,13 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, );
 
 #if HAVE_YASM
-            if (!high_bit_depth) {
+            if (!high_bit_depth && CONFIG_H264CHROMA) {
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
             c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
             c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;
             }
-            if (bit_depth == 10) {
+            if (bit_depth == 10 && CONFIG_H264CHROMA) {
                 c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_10_mmxext;
                 c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_10_mmxext;
                 c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_10_mmxext;
@@ -2728,7 +2728,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, );
 
 #if HAVE_YASM
-            if (!high_bit_depth) {
+            if (!high_bit_depth && CONFIG_H264CHROMA) {
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
             }
@@ -2781,8 +2781,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
                 H264_QPEL_FUNCS_10(2, 0, sse2_cache64)
                 H264_QPEL_FUNCS_10(3, 0, sse2_cache64)
 
-                c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_sse2;
-                c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_sse2;
+                if (CONFIG_H264CHROMA) {
+                    c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
+                    c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
+                }
             }
 #endif
         }
@@ -2808,7 +2810,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
                 H264_QPEL_FUNCS_10(2, 0, ssse3_cache64)
                 H264_QPEL_FUNCS_10(3, 0, ssse3_cache64)
             }
-            if (!high_bit_depth) {
+            if (!high_bit_depth && CONFIG_H264CHROMA) {
             c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
             c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
@@ -2909,8 +2911,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
                 H264_QPEL_FUNCS_10(2, 0, sse2)
                 H264_QPEL_FUNCS_10(3, 0, sse2)
 
-                c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_avx;
-                c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_avx;
+                if (CONFIG_H264CHROMA) {
+                    c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
+                    c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx;
+                }
             }
             c->butterflies_float_interleave = ff_butterflies_float_interleave_avx;
         }
author	Michael Niedermayer <michaelni@gmx.at>	2011-12-14 23:58:10 +0100
committer	Michael Niedermayer <michaelni@gmx.at>	2011-12-14 23:58:10 +0100
commit	e462257242fc037c99206457d1316e1ff9e5306f (patch)
tree	045910517a8b587f7a016b1c46403e1d1021f4f2 /libavcodec
parent	a1be5bc79d7ac4c7c7ed79c4d72b4f1945ecb55c (diff)
parent	115a57302a7d6661426304bec3a5bc72d0edf4b0 (diff)
download	ffmpeg-e462257242fc037c99206457d1316e1ff9e5306f.tar.gz