diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-12-14 23:58:10 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-12-14 23:58:10 +0100 |
commit | e462257242fc037c99206457d1316e1ff9e5306f (patch) | |
tree | 045910517a8b587f7a016b1c46403e1d1021f4f2 /libavcodec | |
parent | a1be5bc79d7ac4c7c7ed79c4d72b4f1945ecb55c (diff) | |
parent | 115a57302a7d6661426304bec3a5bc72d0edf4b0 (diff) | |
download | ffmpeg-e462257242fc037c99206457d1316e1ff9e5306f.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master: (23 commits)
applehttp: Properly clean up if unable to probe a segment
applehttp: Avoid reading uninitialized memory
fate: Replace misleading "aac" in the name of an ADTS test with "adts".
fate: Drop pointless "-an" from pictor test command.
fate: split off image codec FATE tests into their own file
fate: split off WMA codec FATE tests into their own file
fate: split off lossless video and audio FATE tests into their own files
fate: split off qtrle codec FATE tests into their own file
fate: split off Ut Video codec FATE tests into their own file
fate: split off screen codec FATE tests into their own file
fate: split off Real Inc. codec FATE tests into their own file
fate: split off AC-3 codec FATE tests into their own file
mpegvideo: remove abort() in ff_find_unused_picture()
rv40: NEON optimised loop filter strength selection
rv40: rearrange loop filter functions
configure: cosmetics: sort some lists where appropriate
swscale_mmx: drop no longer required parameters from VSCALEX macros
swscale: Mark yuv2planeX_8_mmx as MMX2; it contains MMX2 instructions.
build: conditionally compile x86 H.264 chroma optimizations
v410 encoder and decoder
...
Conflicts:
Changelog
configure
doc/developer.texi
doc/general.texi
libavcodec/arm/asm.S
libavcodec/avcodec.h
libavcodec/v410dec.c
libavcodec/v410enc.c
libavcodec/version.h
libavcodec/x86/Makefile
libavcodec/x86/dsputil_mmx.c
libswscale/x86/swscale_mmx.c
tests/Makefile
tests/fate2.mak
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/arm/asm.S | 8 | ||||
-rw-r--r-- | libavcodec/arm/rv40dsp_init_neon.c | 10 | ||||
-rw-r--r-- | libavcodec/arm/rv40dsp_neon.S | 86 | ||||
-rw-r--r-- | libavcodec/avcodec.h | 10 | ||||
-rw-r--r-- | libavcodec/h261dec.c | 2 | ||||
-rw-r--r-- | libavcodec/h263dec.c | 2 | ||||
-rw-r--r-- | libavcodec/mpegvideo.c | 22 | ||||
-rw-r--r-- | libavcodec/mpegvideo_enc.c | 6 | ||||
-rw-r--r-- | libavcodec/rv34dsp.h | 21 | ||||
-rw-r--r-- | libavcodec/rv40.c | 69 | ||||
-rw-r--r-- | libavcodec/rv40dsp.c | 296 | ||||
-rw-r--r-- | libavcodec/vc1dec.c | 2 | ||||
-rw-r--r-- | libavcodec/version.h | 2 | ||||
-rw-r--r-- | libavcodec/x86/Makefile | 5 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 22 |
15 files changed, 377 insertions, 186 deletions
diff --git a/libavcodec/arm/asm.S b/libavcodec/arm/asm.S index d711cb8f11..2daac59242 100644 --- a/libavcodec/arm/asm.S +++ b/libavcodec/arm/asm.S @@ -113,10 +113,10 @@ T add \rn, \rn, \rm T ldr \rt, [\rn] .endm -.macro ldr_dpren rt, rn, rm:vararg -A ldr \rt, [\rn, -\rm] -T sub \rt, \rn, \rm -T ldr \rt, [\rt] +.macro ldr_dpre rt, rn, rm:vararg +A ldr \rt, [\rn, -\rm]! +T sub \rn, \rn, \rm +T ldr \rt, [\rn] .endm .macro ldr_post rt, rn, rm:vararg diff --git a/libavcodec/arm/rv40dsp_init_neon.c b/libavcodec/arm/rv40dsp_init_neon.c index 36d75e6fd8..59dddb6605 100644 --- a/libavcodec/arm/rv40dsp_init_neon.c +++ b/libavcodec/arm/rv40dsp_init_neon.c @@ -54,6 +54,13 @@ void ff_avg_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_rv40_weight_func_16_neon(uint8_t *, uint8_t *, uint8_t *, int, int, int); void ff_rv40_weight_func_8_neon(uint8_t *, uint8_t *, uint8_t *, int, int, int); +int ff_rv40_h_loop_filter_strength_neon(uint8_t *src, int stride, + int beta, int beta2, int edge, + int *p1, int *q1); +int ff_rv40_v_loop_filter_strength_neon(uint8_t *src, int stride, + int beta, int beta2, int edge, + int *p1, int *q1); + void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp) { c->put_pixels_tab[0][ 1] = ff_put_rv40_qpel16_mc10_neon; @@ -116,4 +123,7 @@ void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp) c->rv40_weight_pixels_tab[0] = ff_rv40_weight_func_16_neon; c->rv40_weight_pixels_tab[1] = ff_rv40_weight_func_8_neon; + + c->rv40_loop_filter_strength[0] = ff_rv40_h_loop_filter_strength_neon; + c->rv40_loop_filter_strength[1] = ff_rv40_v_loop_filter_strength_neon; } diff --git a/libavcodec/arm/rv40dsp_neon.S b/libavcodec/arm/rv40dsp_neon.S index 07ba8428c1..a4313d89f9 100644 --- a/libavcodec/arm/rv40dsp_neon.S +++ b/libavcodec/arm/rv40dsp_neon.S @@ -722,3 +722,89 @@ function ff_rv40_weight_func_8_neon, export=1 bne 1b bx lr endfunc + +function ff_rv40_h_loop_filter_strength_neon, export=1 + pkhbt r2, r3, r2, lsl #18 + + ldr r3, [r0] + ldr_dpre r12, r0, r1 + teq r3, r12 + beq 1f + + sub r0, r0, r1, lsl #1 + + vld1.32 {d4[]}, [r0,:32], r1 @ -3 + vld1.32 {d0[]}, [r0,:32], r1 @ -2 + vld1.32 {d4[1]}, [r0,:32], r1 @ -1 + vld1.32 {d5[]}, [r0,:32], r1 @ 0 + vld1.32 {d1[]}, [r0,:32], r1 @ 1 + vld1.32 {d5[0]}, [r0,:32], r1 @ 2 + + vpaddl.u8 q8, q0 @ -2, -2, -2, -2, 1, 1, 1, 1 + vpaddl.u8 q9, q2 @ -3, -3, -1, -1, 2, 2, 0, 0 + vdup.32 d30, r2 @ beta2, beta << 2 + vpadd.u16 d16, d16, d17 @ -2, -2, 1, 1 + vpadd.u16 d18, d18, d19 @ -3, -1, 2, 0 + vabd.u16 d16, d18, d16 + vclt.u16 d16, d16, d30 + + ldrd r2, r3, [sp, #4] + vmovl.u16 q12, d16 + vtrn.16 d16, d17 + vshr.u32 q12, q12, #15 + ldr r0, [sp] + vst1.32 {d24[1]}, [r2,:32] + vst1.32 {d25[1]}, [r3,:32] + + cmp r0, #0 + it eq + bxeq lr + + vand d18, d16, d17 + vtrn.32 d18, d19 + vand d18, d18, d19 + vmov.u16 r0, d18[0] + bx lr +1: + ldrd r2, r3, [sp, #4] + mov r0, #0 + str r0, [r2] + str r0, [r3] + bx lr +endfunc + +function ff_rv40_v_loop_filter_strength_neon, export=1 + sub r0, r0, #3 + pkhbt r2, r3, r2, lsl #18 + + vld1.8 {d0}, [r0], r1 + vld1.8 {d1}, [r0], r1 + vld1.8 {d2}, [r0], r1 + vld1.8 {d3}, [r0], r1 + + vaddl.u8 q0, d0, d1 + vaddl.u8 q1, d2, d3 + vdup.32 q15, r2 + vadd.u16 q0, q0, q1 @ -3, -2, -1, 0, 1, 2 + vext.16 q1, q0, q0, #1 @ -2, -1, 0, 1, 2 + vabd.u16 q0, q1, q0 + vclt.u16 q0, q0, q15 + + ldrd r2, r3, [sp, #4] + vmovl.u16 q1, d0 + vext.16 d1, d0, d1, #3 + vshr.u32 q1, q1, #15 + ldr r0, [sp] + vst1.32 {d2[1]}, [r2,:32] + vst1.32 {d3[1]}, [r3,:32] + + cmp r0, #0 + it eq + bxeq lr + + vand d0, d0, d1 + vtrn.16 d0, d1 + vand d0, d0, d1 + vmov.u16 r0, d0[0] + bx lr +endfunc diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 4290f8b4be..5a0e0ed380 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -4629,16 +4629,8 @@ int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width, unsigned int av_xiphlacing(unsigned char *s, unsigned int v); /** -<<<<<<< HEAD - * Logs a generic warning message about a missing feature. This function is - * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.) -||||||| merged common ancestors - * Logs a generic warning message about a missing feature. This function is - * intended to be used internally by Libav (libavcodec, libavformat, etc.) -======= * Log a generic warning message about a missing feature. This function is - * intended to be used internally by Libav (libavcodec, libavformat, etc.) ->>>>>>> qatar/master + * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.) * only, and would normally not be used by applications. * @param[in] avc a pointer to an arbitrary struct of which the first field is * a pointer to an AVClass struct diff --git a/libavcodec/h261dec.c b/libavcodec/h261dec.c index 9b34690e17..ff3d05c514 100644 --- a/libavcodec/h261dec.c +++ b/libavcodec/h261dec.c @@ -572,6 +572,8 @@ retry: //we need to set current_picture_ptr before reading the header, otherwise we cannot store anyting im there if (s->current_picture_ptr == NULL || s->current_picture_ptr->f.data[0]) { int i= ff_find_unused_picture(s, 0); + if (i < 0) + return i; s->current_picture_ptr= &s->picture[i]; } diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c index 9bad8e6c74..0d741d80c2 100644 --- a/libavcodec/h263dec.c +++ b/libavcodec/h263dec.c @@ -410,6 +410,8 @@ retry: * otherwise we cannot store anyting in there */ if (s->current_picture_ptr == NULL || s->current_picture_ptr->f.data[0]) { int i= ff_find_unused_picture(s, 0); + if (i < 0) + return i; s->current_picture_ptr= &s->picture[i]; } diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 2b792642c5..8924046dc4 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -1095,21 +1095,7 @@ int ff_find_unused_picture(MpegEncContext *s, int shared) } } - av_log(s->avctx, AV_LOG_FATAL, - "Internal error, picture buffer overflow\n"); - /* We could return -1, but the codec would crash trying to draw into a - * non-existing frame anyway. This is safer than waiting for a random crash. - * Also the return of this is never useful, an encoder must only allocate - * as much as allowed in the specification. This has no relationship to how - * much libavcodec could allocate (and MAX_PICTURE_COUNT is always large - * enough for such valid streams). - * Plus, a decoder has to check stream validity and remove frames if too - * many reference frames are around. Waiting for "OOM" is not correct at - * all. Similarly, missing reference frames have to be replaced by - * interpolated/MC frames, anything else is a bug in the codec ... - */ - abort(); - return -1; + return AVERROR_INVALIDDATA; } static void update_noise_reduction(MpegEncContext *s){ @@ -1167,6 +1153,8 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx) pic= s->current_picture_ptr; //we already have a unused image (maybe it was set before reading the header) else{ i= ff_find_unused_picture(s, 0); + if (i < 0) + return i; pic= &s->picture[i]; } @@ -1222,6 +1210,8 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx) /* Allocate a dummy frame */ i= ff_find_unused_picture(s, 0); + if (i < 0) + return i; s->last_picture_ptr= &s->picture[i]; s->last_picture_ptr->f.key_frame = 0; if(ff_alloc_picture(s, s->last_picture_ptr, 0) < 0) @@ -1238,6 +1228,8 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx) if ((s->next_picture_ptr == NULL || s->next_picture_ptr->f.data[0] == NULL) && s->pict_type == AV_PICTURE_TYPE_B) { /* Allocate a dummy frame */ i= ff_find_unused_picture(s, 0); + if (i < 0) + return i; s->next_picture_ptr= &s->picture[i]; s->next_picture_ptr->f.key_frame = 0; if(ff_alloc_picture(s, s->next_picture_ptr, 0) < 0) diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c index 40dea427d4..83c4932d5b 100644 --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@ -864,6 +864,8 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){ if(direct){ i= ff_find_unused_picture(s, 1); + if (i < 0) + return i; pic= (AVFrame*)&s->picture[i]; pic->reference= 3; @@ -877,6 +879,8 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){ } }else{ i= ff_find_unused_picture(s, 0); + if (i < 0) + return i; pic= (AVFrame*)&s->picture[i]; pic->reference= 3; @@ -1210,6 +1214,8 @@ no_output_pic: // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable int i= ff_find_unused_picture(s, 0); + if (i < 0) + return i; Picture *pic= &s->picture[i]; pic->f.reference = s->reordered_input_picture[0]->f.reference; diff --git a/libavcodec/rv34dsp.h b/libavcodec/rv34dsp.h index cf6e14d305..01352ea793 100644 --- a/libavcodec/rv34dsp.h +++ b/libavcodec/rv34dsp.h @@ -36,10 +36,18 @@ typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/, typedef void (*rv34_inv_transform_func)(DCTELEM *block); -typedef void (*rv40_loop_filter_func)(uint8_t *src, int stride, int dmode, - int lim_q1, int lim_p1, int alpha, - int beta, int beta2, int chroma, - int edge); +typedef void (*rv40_weak_loop_filter_func)(uint8_t *src, int stride, + int filter_p1, int filter_q1, + int alpha, int beta, + int lims, int lim_q1, int lim_p1); + +typedef void (*rv40_strong_loop_filter_func)(uint8_t *src, int stride, + int alpha, int lims, + int dmode, int chroma); + +typedef int (*rv40_loop_filter_strength_func)(uint8_t *src, int stride, + int beta, int beta2, int edge, + int *p1, int *q1); typedef struct RV34DSPContext { qpel_mc_func put_pixels_tab[4][16]; @@ -49,8 +57,9 @@ typedef struct RV34DSPContext { rv40_weight_func rv40_weight_pixels_tab[2]; rv34_inv_transform_func rv34_inv_transform_tab[2]; void (*rv34_dequant4x4)(DCTELEM *block, int Qdc, int Q); - rv40_loop_filter_func rv40_h_loop_filter; - rv40_loop_filter_func rv40_v_loop_filter; + rv40_weak_loop_filter_func rv40_weak_loop_filter[2]; + rv40_strong_loop_filter_func rv40_strong_loop_filter[2]; + rv40_loop_filter_strength_func rv40_loop_filter_strength[2]; } RV34DSPContext; void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp); diff --git a/libavcodec/rv40.c b/libavcodec/rv40.c index 8dd78f1fb8..fe104c1908 100644 --- a/libavcodec/rv40.c +++ b/libavcodec/rv40.c @@ -297,6 +297,34 @@ enum RV40BlockPos{ static const int neighbour_offs_x[4] = { 0, 0, -1, 0 }; static const int neighbour_offs_y[4] = { 0, -1, 0, 1 }; +static void rv40_adaptive_loop_filter(RV34DSPContext *rdsp, + uint8_t *src, int stride, int dmode, + int lim_q1, int lim_p1, + int alpha, int beta, int beta2, + int chroma, int edge, int dir) +{ + int filter_p1, filter_q1; + int strong; + int lims; + + strong = rdsp->rv40_loop_filter_strength[dir](src, stride, beta, beta2, + edge, &filter_p1, &filter_q1); + + lims = filter_p1 + filter_q1 + ((lim_q1 + lim_p1) >> 1) + 1; + + if (strong) { + rdsp->rv40_strong_loop_filter[dir](src, stride, alpha, + lims, dmode, chroma); + } else if (filter_p1 & filter_q1) { + rdsp->rv40_weak_loop_filter[dir](src, stride, 1, 1, alpha, beta, + lims, lim_q1, lim_p1); + } else if (filter_p1 | filter_q1) { + rdsp->rv40_weak_loop_filter[dir](src, stride, filter_p1, filter_q1, + alpha, beta, lims >> 1, lim_q1 >> 1, + lim_p1 >> 1); + } +} + /** * RV40 loop filtering function */ @@ -433,10 +461,11 @@ static void rv40_loop_filter(RV34DecContext *r, int row) // if bottom block is coded then we can filter its top edge // (or bottom edge of this block, which is the same) if(y_h_deblock & (MASK_BOTTOM << ij)){ - r->rdsp.rv40_h_loop_filter(Y+4*s->linesize, s->linesize, dither, - y_to_deblock & (MASK_BOTTOM << ij) ? clip[POS_CUR] : 0, - clip_cur, - alpha, beta, betaY, 0, 0); + rv40_adaptive_loop_filter(&r->rdsp, Y+4*s->linesize, + s->linesize, dither, + y_to_deblock & (MASK_BOTTOM << ij) ? clip[POS_CUR] : 0, + clip_cur, alpha, beta, betaY, + 0, 0, 0); } // filter left block edge in ordinary mode (with low filtering strength) if(y_v_deblock & (MASK_CUR << ij) && (i || !(mb_strong[POS_CUR] || mb_strong[POS_LEFT]))){ @@ -444,25 +473,25 @@ static void rv40_loop_filter(RV34DecContext *r, int row) clip_left = mvmasks[POS_LEFT] & (MASK_RIGHT << j) ? clip[POS_LEFT] : 0; else clip_left = y_to_deblock & (MASK_CUR << (ij-1)) ? clip[POS_CUR] : 0; - r->rdsp.rv40_v_loop_filter(Y, s->linesize, dither, - clip_cur, - clip_left, - alpha, beta, betaY, 0, 0); + rv40_adaptive_loop_filter(&r->rdsp, Y, s->linesize, dither, + clip_cur, + clip_left, + alpha, beta, betaY, 0, 0, 1); } // filter top edge of the current macroblock when filtering strength is high if(!j && y_h_deblock & (MASK_CUR << i) && (mb_strong[POS_CUR] || mb_strong[POS_TOP])){ - r->rdsp.rv40_h_loop_filter(Y, s->linesize, dither, + rv40_adaptive_loop_filter(&r->rdsp, Y, s->linesize, dither, clip_cur, mvmasks[POS_TOP] & (MASK_TOP << i) ? clip[POS_TOP] : 0, - alpha, beta, betaY, 0, 1); + alpha, beta, betaY, 0, 1, 0); } // filter left block edge in edge mode (with high filtering strength) if(y_v_deblock & (MASK_CUR << ij) && !i && (mb_strong[POS_CUR] || mb_strong[POS_LEFT])){ clip_left = mvmasks[POS_LEFT] & (MASK_RIGHT << j) ? clip[POS_LEFT] : 0; - r->rdsp.rv40_v_loop_filter(Y, s->linesize, dither, + rv40_adaptive_loop_filter(&r->rdsp, Y, s->linesize, dither, clip_cur, clip_left, - alpha, beta, betaY, 0, 1); + alpha, beta, betaY, 0, 1, 1); } } } @@ -474,34 +503,34 @@ static void rv40_loop_filter(RV34DecContext *r, int row) int clip_cur = c_to_deblock[k] & (MASK_CUR << ij) ? clip[POS_CUR] : 0; if(c_h_deblock[k] & (MASK_CUR << (ij+2))){ int clip_bot = c_to_deblock[k] & (MASK_CUR << (ij+2)) ? clip[POS_CUR] : 0; - r->rdsp.rv40_h_loop_filter(C+4*s->uvlinesize, s->uvlinesize, i*8, + rv40_adaptive_loop_filter(&r->rdsp, C+4*s->uvlinesize, s->uvlinesize, i*8, clip_bot, clip_cur, - alpha, beta, betaC, 1, 0); + alpha, beta, betaC, 1, 0, 0); } if((c_v_deblock[k] & (MASK_CUR << ij)) && (i || !(mb_strong[POS_CUR] || mb_strong[POS_LEFT]))){ if(!i) clip_left = uvcbp[POS_LEFT][k] & (MASK_CUR << (2*j+1)) ? clip[POS_LEFT] : 0; else clip_left = c_to_deblock[k] & (MASK_CUR << (ij-1)) ? clip[POS_CUR] : 0; - r->rdsp.rv40_v_loop_filter(C, s->uvlinesize, j*8, + rv40_adaptive_loop_filter(&r->rdsp, C, s->uvlinesize, j*8, clip_cur, clip_left, - alpha, beta, betaC, 1, 0); + alpha, beta, betaC, 1, 0, 1); } if(!j && c_h_deblock[k] & (MASK_CUR << ij) && (mb_strong[POS_CUR] || mb_strong[POS_TOP])){ int clip_top = uvcbp[POS_TOP][k] & (MASK_CUR << (ij+2)) ? clip[POS_TOP] : 0; - r->rdsp.rv40_h_loop_filter(C, s->uvlinesize, i*8, + rv40_adaptive_loop_filter(&r->rdsp, C, s->uvlinesize, i*8, clip_cur, clip_top, - alpha, beta, betaC, 1, 1); + alpha, beta, betaC, 1, 1, 0); } if(c_v_deblock[k] & (MASK_CUR << ij) && !i && (mb_strong[POS_CUR] || mb_strong[POS_LEFT])){ clip_left = uvcbp[POS_LEFT][k] & (MASK_CUR << (2*j+1)) ? clip[POS_LEFT] : 0; - r->rdsp.rv40_v_loop_filter(C, s->uvlinesize, j*8, + rv40_adaptive_loop_filter(&r->rdsp, C, s->uvlinesize, j*8, clip_cur, clip_left, - alpha, beta, betaC, 1, 1); + alpha, beta, betaC, 1, 1, 1); } } } diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c index 1fc247e88b..913ced046d 100644 --- a/libavcodec/rv40dsp.c +++ b/libavcodec/rv40dsp.c @@ -314,142 +314,194 @@ static const uint8_t rv40_dither_r[16] = { /** * weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1 */ -static inline void rv40_weak_loop_filter(uint8_t *src, const int step, - const int filter_p1, const int filter_q1, - const int alpha, const int beta, - const int lim_p0q0, - const int lim_q1, const int lim_p1, - const int diff_p1p0, const int diff_q1q0, - const int diff_p1p2, const int diff_q1q2) +static av_always_inline void rv40_weak_loop_filter(uint8_t *src, + const int step, + const int stride, + const int filter_p1, + const int filter_q1, + const int alpha, + const int beta, + const int lim_p0q0, + const int lim_q1, + const int lim_p1) { uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - int t, u, diff; - - t = src[0*step] - src[-1*step]; - if(!t) - return; - u = (alpha * FFABS(t)) >> 7; - if(u > 3 - (filter_p1 && filter_q1)) - return; - - t <<= 2; - if(filter_p1 && filter_q1) - t += src[-2*step] - src[1*step]; - diff = CLIP_SYMM((t + 4) >> 3, lim_p0q0); - src[-1*step] = cm[src[-1*step] + diff]; - src[ 0*step] = cm[src[ 0*step] - diff]; - if(FFABS(diff_p1p2) <= beta && filter_p1){ - t = (diff_p1p0 + diff_p1p2 - diff) >> 1; - src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim_p1)]; + int i, t, u, diff; + + for (i = 0; i < 4; i++, src += stride) { + int diff_p1p0 = src[-2*step] - src[-1*step]; + int diff_q1q0 = src[ 1*step] - src[ 0*step]; + int diff_p1p2 = src[-2*step] - src[-3*step]; + int diff_q1q2 = src[ 1*step] - src[ 2*step]; + + t = src[0*step] - src[-1*step]; + if (!t) + continue; + + u = (alpha * FFABS(t)) >> 7; + if (u > 3 - (filter_p1 && filter_q1)) + continue; + + t <<= 2; + if (filter_p1 && filter_q1) + t += src[-2*step] - src[1*step]; + + diff = CLIP_SYMM((t + 4) >> 3, lim_p0q0); + src[-1*step] = cm[src[-1*step] + diff]; + src[ 0*step] = cm[src[ 0*step] - diff]; + + if (filter_p1 && FFABS(diff_p1p2) <= beta) { + t = (diff_p1p0 + diff_p1p2 - diff) >> 1; + src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim_p1)]; + } + + if (filter_q1 && FFABS(diff_q1q2) <= beta) { + t = (diff_q1q0 + diff_q1q2 + diff) >> 1; + src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim_q1)]; + } } - if(FFABS(diff_q1q2) <= beta && filter_q1){ - t = (diff_q1q0 + diff_q1q2 + diff) >> 1; - src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim_q1)]; +} + +static void rv40_h_weak_loop_filter(uint8_t *src, const int stride, + const int filter_p1, const int filter_q1, + const int alpha, const int beta, + const int lim_p0q0, const int lim_q1, + const int lim_p1) +{ + rv40_weak_loop_filter(src, stride, 1, filter_p1, filter_q1, + alpha, beta, lim_p0q0, lim_q1, lim_p1); +} + +static void rv40_v_weak_loop_filter(uint8_t *src, const int stride, + const int filter_p1, const int filter_q1, + const int alpha, const int beta, + const int lim_p0q0, const int lim_q1, + const int lim_p1) +{ + rv40_weak_loop_filter(src, 1, stride, filter_p1, filter_q1, + alpha, beta, lim_p0q0, lim_q1, lim_p1); +} + +static av_always_inline void rv40_strong_loop_filter(uint8_t *src, + const int step, + const int stride, + const int alpha, + const int lims, + const int dmode, + const int chroma) +{ + int i; + + for(i = 0; i < 4; i++, src += stride){ + int sflag, p0, q0, p1, q1; + int t = src[0*step] - src[-1*step]; + + if (!t) + continue; + + sflag = (alpha * FFABS(t)) >> 7; + if (sflag > 1) + continue; + + p0 = (25*src[-3*step] + 26*src[-2*step] + 26*src[-1*step] + + 26*src[ 0*step] + 25*src[ 1*step] + + rv40_dither_l[dmode + i]) >> 7; + + q0 = (25*src[-2*step] + 26*src[-1*step] + 26*src[ 0*step] + + 26*src[ 1*step] + 25*src[ 2*step] + + rv40_dither_r[dmode + i]) >> 7; + + if (sflag) { + p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims); + q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims); + } + + p1 = (25*src[-4*step] + 26*src[-3*step] + 26*src[-2*step] + 26*p0 + + 25*src[ 0*step] + rv40_dither_l[dmode + i]) >> 7; + q1 = (25*src[-1*step] + 26*q0 + 26*src[ 1*step] + 26*src[ 2*step] + + 25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7; + + if (sflag) { + p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims); + q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims); + } + + src[-2*step] = p1; + src[-1*step] = p0; + src[ 0*step] = q0; + src[ 1*step] = q1; + + if(!chroma){ + src[-3*step] = (25*src[-1*step] + 26*src[-2*step] + + 51*src[-3*step] + 26*src[-4*step] + 64) >> 7; + src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] + + 51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7; + } } } -static av_always_inline void rv40_adaptive_loop_filter(uint8_t *src, const int step, - const int stride, const int dmode, - const int lim_q1, const int lim_p1, - const int alpha, - const int beta, const int beta2, - const int chroma, const int edge) +static void rv40_h_strong_loop_filter(uint8_t *src, const int stride, + const int alpha, const int lims, + const int dmode, const int chroma) +{ + rv40_strong_loop_filter(src, stride, 1, alpha, lims, dmode, chroma); +} + +static void rv40_v_strong_loop_filter(uint8_t *src, const int stride, + const int alpha, const int lims, + const int dmode, const int chroma) +{ + rv40_strong_loop_filter(src, 1, stride, alpha, lims, dmode, chroma); +} + +static av_always_inline int rv40_loop_filter_strength(uint8_t *src, + int step, int stride, + int beta, int beta2, + int edge, + int *p1, int *q1) { - int diff_p1p0[4], diff_q1q0[4], diff_p1p2[4], diff_q1q2[4]; int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0; + int strong0 = 0, strong1 = 0; uint8_t *ptr; - int flag_strong0 = 1, flag_strong1 = 1; - int filter_p1, filter_q1; int i; - int lims; - for(i = 0, ptr = src; i < 4; i++, ptr += stride){ - diff_p1p0[i] = ptr[-2*step] - ptr[-1*step]; - diff_q1q0[i] = ptr[ 1*step] - ptr[ 0*step]; - sum_p1p0 += diff_p1p0[i]; - sum_q1q0 += diff_q1q0[i]; - } - filter_p1 = FFABS(sum_p1p0) < (beta<<2); - filter_q1 = FFABS(sum_q1q0) < (beta<<2); - if(!filter_p1 && !filter_q1) - return; - - for(i = 0, ptr = src; i < 4; i++, ptr += stride){ - diff_p1p2[i] = ptr[-2*step] - ptr[-3*step]; - diff_q1q2[i] = ptr[ 1*step] - ptr[ 2*step]; - sum_p1p2 += diff_p1p2[i]; - sum_q1q2 += diff_q1q2[i]; + for (i = 0, ptr = src; i < 4; i++, ptr += stride) { + sum_p1p0 += ptr[-2*step] - ptr[-1*step]; + sum_q1q0 += ptr[ 1*step] - ptr[ 0*step]; } - if(edge){ - flag_strong0 = filter_p1 && (FFABS(sum_p1p2) < beta2); - flag_strong1 = filter_q1 && (FFABS(sum_q1q2) < beta2); - }else{ - flag_strong0 = flag_strong1 = 0; - } + *p1 = FFABS(sum_p1p0) < (beta << 2); + *q1 = FFABS(sum_q1q0) < (beta << 2); - lims = filter_p1 + filter_q1 + ((lim_q1 + lim_p1) >> 1) + 1; - if(flag_strong0 && flag_strong1){ /* strong filtering */ - for(i = 0; i < 4; i++, src += stride){ - int sflag, p0, q0, p1, q1; - int t = src[0*step] - src[-1*step]; - - if(!t) continue; - sflag = (alpha * FFABS(t)) >> 7; - if(sflag > 1) continue; - - p0 = (25*src[-3*step] + 26*src[-2*step] - + 26*src[-1*step] - + 26*src[ 0*step] + 25*src[ 1*step] + rv40_dither_l[dmode + i]) >> 7; - q0 = (25*src[-2*step] + 26*src[-1*step] - + 26*src[ 0*step] - + 26*src[ 1*step] + 25*src[ 2*step] + rv40_dither_r[dmode + i]) >> 7; - if(sflag){ - p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims); - q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims); - } - p1 = (25*src[-4*step] + 26*src[-3*step] - + 26*src[-2*step] - + 26*p0 + 25*src[ 0*step] + rv40_dither_l[dmode + i]) >> 7; - q1 = (25*src[-1*step] + 26*q0 - + 26*src[ 1*step] - + 26*src[ 2*step] + 25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7; - if(sflag){ - p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims); - q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims); - } - src[-2*step] = p1; - src[-1*step] = p0; - src[ 0*step] = q0; - src[ 1*step] = q1; - if(!chroma){ - src[-3*step] = (25*src[-1*step] + 26*src[-2*step] + 51*src[-3*step] + 26*src[-4*step] + 64) >> 7; - src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] + 51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7; - } - } - }else if(filter_p1 && filter_q1){ - for(i = 0; i < 4; i++, src += stride) - rv40_weak_loop_filter(src, step, 1, 1, alpha, beta, lims, lim_q1, lim_p1, - diff_p1p0[i], diff_q1q0[i], diff_p1p2[i], diff_q1q2[i]); - }else{ - for(i = 0; i < 4; i++, src += stride) - rv40_weak_loop_filter(src, step, filter_p1, filter_q1, - alpha, beta, lims>>1, lim_q1>>1, lim_p1>>1, - diff_p1p0[i], diff_q1q0[i], diff_p1p2[i], diff_q1q2[i]); + if(!*p1 && !*q1) + return 0; + + if (!edge) + return 0; + + for (i = 0, ptr = src; i < 4; i++, ptr += stride) { + sum_p1p2 += ptr[-2*step] - ptr[-3*step]; + sum_q1q2 += ptr[ 1*step] - ptr[ 2*step]; } + + strong0 = *p1 && (FFABS(sum_p1p2) < beta2); + strong1 = *q1 && (FFABS(sum_q1q2) < beta2); + + return strong0 && strong1; } -static void rv40_v_loop_filter(uint8_t *src, int stride, int dmode, - int lim_q1, int lim_p1, - int alpha, int beta, int beta2, int chroma, int edge){ - rv40_adaptive_loop_filter(src, 1, stride, dmode, lim_q1, lim_p1, - alpha, beta, beta2, chroma, edge); +static int rv40_h_loop_filter_strength(uint8_t *src, int stride, + int beta, int beta2, int edge, + int *p1, int *q1) +{ + return rv40_loop_filter_strength(src, stride, 1, beta, beta2, edge, p1, q1); } -static void rv40_h_loop_filter(uint8_t *src, int stride, int dmode, - int lim_q1, int lim_p1, - int alpha, int beta, int beta2, int chroma, int edge){ - rv40_adaptive_loop_filter(src, stride, 1, dmode, lim_q1, lim_p1, - alpha, beta, beta2, chroma, edge); + +static int rv40_v_loop_filter_strength(uint8_t *src, int stride, + int beta, int beta2, int edge, + int *p1, int *q1) +{ + return rv40_loop_filter_strength(src, 1, stride, beta, beta2, edge, p1, q1); } av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) { @@ -529,8 +581,12 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) { c->rv40_weight_pixels_tab[0] = rv40_weight_func_16; c->rv40_weight_pixels_tab[1] = rv40_weight_func_8; - c->rv40_h_loop_filter = rv40_h_loop_filter; - c->rv40_v_loop_filter = rv40_v_loop_filter; + c->rv40_weak_loop_filter[0] = rv40_h_weak_loop_filter; + c->rv40_weak_loop_filter[1] = rv40_v_weak_loop_filter; + c->rv40_strong_loop_filter[0] = rv40_h_strong_loop_filter; + c->rv40_strong_loop_filter[1] = rv40_v_strong_loop_filter; + c->rv40_loop_filter_strength[0] = rv40_h_loop_filter_strength; + c->rv40_loop_filter_strength[1] = rv40_v_loop_filter_strength; if (HAVE_MMX) ff_rv40dsp_init_x86(c, dsp); diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index 4dab137eb7..771574df78 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -5582,6 +5582,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, * otherwise we cannot store anything in there. */ if (s->current_picture_ptr == NULL || s->current_picture_ptr->f.data[0]) { int i = ff_find_unused_picture(s, 0); + if (i < 0) + goto err; s->current_picture_ptr = &s->picture[i]; } diff --git a/libavcodec/version.h b/libavcodec/version.h index 870d7e0a30..84d4dadfa1 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -21,7 +21,7 @@ #define AVCODEC_VERSION_H #define LIBAVCODEC_VERSION_MAJOR 53 -#define LIBAVCODEC_VERSION_MINOR 44 +#define LIBAVCODEC_VERSION_MINOR 45 #define LIBAVCODEC_VERSION_MICRO 0 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 4b85514676..48ff39b0a8 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -11,6 +11,9 @@ YASM-OBJS-$(CONFIG_FFT) += x86/fft_mmx.o \ YASM-OBJS-$(CONFIG_DWT) += x86/dwt_yasm.o +YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o \ + x86/h264_chromamc_10bit.o + MMX-OBJS-$(CONFIG_H264DSP) += x86/h264dsp_mmx.o YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \ x86/h264_deblock_10bit.o \ @@ -59,8 +62,6 @@ MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \ x86/deinterlace.o \ x86/fmtconvert.o \ - x86/h264_chromamc.o \ - x86/h264_chromamc_10bit.o \ x86/h264_qpel_10bit.o \ $(YASM-OBJS-yes) diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index a27c3b53d6..75bcae8110 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2548,7 +2548,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } #if HAVE_YASM - if (!high_bit_depth) { + if (!high_bit_depth && CONFIG_H264CHROMA) { c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd; c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx; } @@ -2652,13 +2652,13 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, ); #if HAVE_YASM - if (!high_bit_depth) { + if (!high_bit_depth && CONFIG_H264CHROMA) { c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2; c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2; c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2; } - if (bit_depth == 10) { + if (bit_depth == 10 && CONFIG_H264CHROMA) { c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_10_mmxext; c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_10_mmxext; c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_10_mmxext; @@ -2728,7 +2728,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, ); #if HAVE_YASM - if (!high_bit_depth) { + if (!high_bit_depth && CONFIG_H264CHROMA) { c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow; } @@ -2781,8 +2781,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) H264_QPEL_FUNCS_10(2, 0, sse2_cache64) H264_QPEL_FUNCS_10(3, 0, sse2_cache64) - c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_sse2; - c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_sse2; + if (CONFIG_H264CHROMA) { + c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2; + c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2; + } } #endif } @@ -2808,7 +2810,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) H264_QPEL_FUNCS_10(2, 0, ssse3_cache64) H264_QPEL_FUNCS_10(3, 0, ssse3_cache64) } - if (!high_bit_depth) { + if (!high_bit_depth && CONFIG_H264CHROMA) { c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd; c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd; c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3; @@ -2909,8 +2911,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) H264_QPEL_FUNCS_10(2, 0, sse2) H264_QPEL_FUNCS_10(3, 0, sse2) - c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_avx; - c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_avx; + if (CONFIG_H264CHROMA) { + c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx; + c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx; + } } c->butterflies_float_interleave = ff_butterflies_float_interleave_avx; } |