diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2004-02-13 17:54:10 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2004-02-13 17:54:10 +0000 |
commit | 9c3d33d67f2260d5ddc888e8ea380c3913e38a72 (patch) | |
tree | 46c82202d3f7c2964f7cace9e1131e97ada124bb | |
parent | 7984082a08cf2119da3433c245b72a91020b879d (diff) | |
download | ffmpeg-9c3d33d67f2260d5ddc888e8ea380c3913e38a72.tar.gz |
multithreaded/SMP motion estimation
multithreaded/SMP encoding for MPEG1/MPEG2/MPEG4/H263
all pthread specific code is in pthread.c
to try it, run configure --enable-pthreads and ffmpeg ... -threads <num>
the internal thread API is a simple AVCodecContext.execute() callback which executes a given function pointer with different arguments and returns after finishing all, that way no mutexes or other thread-mess is needed outside pthread.c
Originally committed as revision 2772 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rwxr-xr-x | configure | 10 | ||||
-rw-r--r-- | ffmpeg.c | 75 | ||||
-rw-r--r-- | libavcodec/Makefile | 4 | ||||
-rw-r--r-- | libavcodec/avcodec.h | 33 | ||||
-rw-r--r-- | libavcodec/common.h | 22 | ||||
-rw-r--r-- | libavcodec/h263.c | 29 | ||||
-rw-r--r-- | libavcodec/mjpeg.c | 11 | ||||
-rw-r--r-- | libavcodec/motion_est.c | 21 | ||||
-rw-r--r-- | libavcodec/motion_est_template.c | 34 | ||||
-rw-r--r-- | libavcodec/mpeg12.c | 4 | ||||
-rw-r--r-- | libavcodec/mpegvideo.c | 798 | ||||
-rw-r--r-- | libavcodec/mpegvideo.h | 19 | ||||
-rw-r--r-- | libavcodec/pthread.c | 139 | ||||
-rw-r--r-- | libavcodec/utils.c | 12 | ||||
-rw-r--r-- | tests/ffmpeg.regression.ref | 8 | ||||
-rwxr-xr-x | tests/regression.sh | 24 | ||||
-rw-r--r-- | tests/rotozoom.regression.ref | 8 |
17 files changed, 900 insertions, 351 deletions
@@ -29,6 +29,7 @@ echo " --enable-amr_nb enable amr_nb float audio codec" echo " --enable-amr_nb-fixed use fixed point for amr-nb codec" echo " --enable-amr_wb enable amr_wb float audio codec" echo " --enable-sunmlib use Sun medialib [default=no]" +echo " --enable-pthreads use pthreads [default=no]" echo "" echo "Advanced options (experts only):" echo " --source-path=PATH path of source code [$source_path]" @@ -171,6 +172,7 @@ amr_nb="no" amr_wb="no" amr_nb_fixed="no" sunmlib="no" +pthreads="no" # OS specific targetos=`uname -s` @@ -415,6 +417,8 @@ for opt do ;; --enable-sunmlib) sunmlib="yes" ;; + --enable-pthreads) pthreads="yes" + ;; esac done @@ -919,6 +923,7 @@ echo "Imlib2 support $imlib2" echo "freetype support $freetype2" fi echo "Sun medialib support" $sunmlib +echo "pthreads support" $pthreads echo "AMR-NB float support" $amr_nb echo "AMR-NB fixed support" $amr_nb_fixed echo "AMR-WB float support" $amr_wb @@ -1033,6 +1038,11 @@ if test "$sunmlib" = "yes" ; then echo "#define HAVE_MLIB 1" >> $TMPH extralibs="$extralibs -lmlib" fi +if test "$pthreads" = "yes" ; then + echo "HAVE_PTHREADS=yes" >> config.mak + echo "#define HAVE_PTHREADS 1" >> $TMPH + extralibs="$extralibs -lpthread" +fi if test "$sdl" = "yes" ; then echo "CONFIG_SDL=yes" >> config.mak echo "SDL_LIBS=`sdl-config --libs`" >> config.mak @@ -117,6 +117,13 @@ static int ildct_cmp = FF_CMP_VSAD; static int mb_cmp = FF_CMP_SAD; static int sub_cmp = FF_CMP_SAD; static int cmp = FF_CMP_SAD; +static int pre_cmp = FF_CMP_SAD; +static int pre_me = 0; +static float lumi_mask = 0; +static float dark_mask = 0; +static float scplx_mask = 0; +static float tcplx_mask = 0; +static float p_mask = 0; static int use_4mv = 0; static int use_obmc = 0; static int use_aic = 0; @@ -185,6 +192,7 @@ static char *audio_device = NULL; static int using_stdin = 0; static int using_vhook = 0; static int verbose = 1; +static int thread_count= 1; #define DEFAULT_PASS_LOGFILENAME "ffmpeg2pass" @@ -1855,6 +1863,41 @@ static void opt_cmp(const char *arg) cmp = atoi(arg); } +static void opt_pre_cmp(const char *arg) +{ + pre_cmp = atoi(arg); +} + +static void opt_pre_me(const char *arg) +{ + pre_me = atoi(arg); +} + +static void opt_lumi_mask(const char *arg) +{ + lumi_mask = atof(arg); +} + +static void opt_dark_mask(const char *arg) +{ + dark_mask = atof(arg); +} + +static void opt_scplx_mask(const char *arg) +{ + scplx_mask = atof(arg); +} + +static void opt_tcplx_mask(const char *arg) +{ + tcplx_mask = atof(arg); +} + +static void opt_p_mask(const char *arg) +{ + p_mask = atof(arg); +} + static void opt_qscale(const char *arg) { video_qscale = atof(arg); @@ -1990,6 +2033,11 @@ static void opt_sc_threshold(const char *arg) sc_threshold= atoi(arg); } +static void opt_thread_count(const char *arg) +{ + thread_count= atoi(arg); +} + static void opt_audio_bitrate(const char *arg) { audio_bit_rate = atoi(arg) * 1000; @@ -2348,6 +2396,10 @@ static void opt_output_file(const char *filename) exit(1); } avcodec_get_context_defaults(&st->codec); +#ifdef HAVE_PTHREADS + if(thread_count>1) + avcodec_pthread_init(&st->codec, thread_count); +#endif video_enc = &st->codec; @@ -2398,6 +2450,13 @@ static void opt_output_file(const char *filename) video_enc->ildct_cmp = ildct_cmp; video_enc->me_sub_cmp = sub_cmp; video_enc->me_cmp = cmp; + video_enc->me_pre_cmp = pre_cmp; + video_enc->pre_me = pre_me; + video_enc->lumi_masking = lumi_mask; + video_enc->dark_masking = dark_mask; + video_enc->spatial_cplx_masking = scplx_mask; + video_enc->temporal_cplx_masking = tcplx_mask; + video_enc->p_masking = p_mask; video_enc->quantizer_noise_shaping= qns; if (use_umv) { @@ -2451,7 +2510,8 @@ static void opt_output_file(const char *filename) video_enc->qcompress = video_qcomp; video_enc->rc_eq = video_rc_eq; video_enc->debug = debug; - video_enc->debug_mv = debug_mv; + video_enc->debug_mv = debug_mv; + video_enc->thread_count = thread_count; p= video_rc_override_string; for(i=0; p; i++){ int start, end, q; @@ -2527,6 +2587,10 @@ static void opt_output_file(const char *filename) exit(1); } avcodec_get_context_defaults(&st->codec); +#ifdef HAVE_PTHREADS + if(thread_count>1) + avcodec_pthread_init(&st->codec, thread_count); +#endif audio_enc = &st->codec; audio_enc->codec_type = CODEC_TYPE_AUDIO; @@ -2544,6 +2608,7 @@ static void opt_output_file(const char *filename) audio_enc->bit_rate = audio_bit_rate; audio_enc->sample_rate = audio_sample_rate; audio_enc->strict_std_compliance = strict; + audio_enc->thread_count = thread_count; /* For audio codecs other than AC3 we limit */ /* the number of coded channels to stereo */ if (audio_channels > 2 && codec_id != CODEC_ID_AC3) { @@ -2999,6 +3064,7 @@ const OptionDef options[] = { { "loop", OPT_BOOL | OPT_EXPERT, {(void*)&loop_input}, "loop (current only works with images)" }, { "v", HAS_ARG, {(void*)opt_verbose}, "control amount of logging", "verbose" }, { "target", HAS_ARG, {(void*)opt_target}, "specify target file type (\"vcd\", \"svcd\" or \"dvd\")", "type" }, + { "threads", HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" }, /* video options */ { "b", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate}, "set video bitrate (in kbit/s)", "bitrate" }, @@ -3049,6 +3115,13 @@ const OptionDef options[] = { { "ildctcmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_ildct_cmp}, "ildct compare function", "cmp function" }, { "subcmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_sub_cmp}, "subpel compare function", "cmp function" }, { "cmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_cmp}, "fullpel compare function", "cmp function" }, + { "precmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_pre_cmp}, "pre motion estimation compare function", "cmp function" }, + { "preme", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_pre_me}, "pre motion estimation", "" }, + { "lumi_mask", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_lumi_mask}, "luminance masking", "" }, + { "dark_mask", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_dark_mask}, "darkness masking", "" }, + { "scplx_mask", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_scplx_mask}, "spatial complexity masking", "" }, + { "tcplx_mask", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_tcplx_mask}, "teporal complexity masking", "" }, + { "p_mask", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_p_mask}, "inter masking", "" }, { "4mv", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_4mv}, "use four motion vector by macroblock (MPEG4)" }, { "obmc", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_obmc}, "use overlapped block motion compensation (h263+)" }, { "part", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_part}, "use data partitioning (MPEG4)" }, diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 8bfaf7efe5..65cf5d51e4 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -33,6 +33,10 @@ CLEANAMR=cleanamrfloat endif endif +ifeq ($(HAVE_PTHREADS),yes) +OBJS+= pthread.o +endif + ifeq ($(AMR_WB),yes) OBJS+= amr.o amrwb_float/dec_acelp.o amrwb_float/dec_dtx.o amrwb_float/dec_gain.o \ amrwb_float/dec_if.o amrwb_float/dec_lpc.o amrwb_float/dec_main.o \ diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 8f3b9dc640..eea51304d4 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -17,7 +17,7 @@ extern "C" { #define FFMPEG_VERSION_INT 0x000408 #define FFMPEG_VERSION "0.4.8" -#define LIBAVCODEC_BUILD 4701 +#define LIBAVCODEC_BUILD 4702 #define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT #define LIBAVCODEC_VERSION FFMPEG_VERSION @@ -1508,6 +1508,32 @@ typedef struct AVCodecContext { * - decoding: unused */ int quantizer_noise_shaping; + + /** + * Thread count. + * is used to decide how many independant tasks should be passed to execute() + * - encoding: set by user + * - decoding: set by user + */ + int thread_count; + + /** + * the codec may call this to execute several independant things. it will return only after + * finishing all tasks, the user may replace this with some multithreaded implementation, the + * default implementation will execute the parts serially + * @param count the number of functions this will be identical to thread_count if possible + * - encoding: set by lavc, user can override + * - decoding: set by lavc, user can override + */ + int (*execute)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg), void **arg2, int *ret, int count); + + /** + * Thread opaque. + * can be used by execute() to store some per AVCodecContext stuff. + * - encoding: set by execute() + * - decoding: set by execute() + */ + void *thread_opaque; } AVCodecContext; @@ -1846,6 +1872,11 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic); void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic); void avcodec_default_free_buffers(AVCodecContext *s); +int avcodec_pthread_init(AVCodecContext *s, int thread_count); +void avcodec_pthread_free(AVCodecContext *s); +int avcodec_pthread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count); +//FIXME func typedef + /** * opens / inits the AVCodecContext. * not thread save! diff --git a/libavcodec/common.h b/libavcodec/common.h index 729ba4615b..c48f5f1a54 100644 --- a/libavcodec/common.h +++ b/libavcodec/common.h @@ -475,6 +475,28 @@ static inline uint8_t* pbBufPtr(PutBitContext *s) #endif } +/** + * + * PutBitContext must be flushed & aligned to a byte boundary before calling this. + */ +static inline void skip_put_bytes(PutBitContext *s, int n){ + assert((put_bits_count(s)&7)==0); +#ifdef ALT_BITSTREAM_WRITER + FIXME may need some cleaning of the buffer + s->index += n<<3; +#else + assert(s->bit_left==32); + s->buf_ptr += n; +#endif +} + +/** + * Changes the end of the buffer. + */ +static inline void set_put_bits_buffer_size(PutBitContext *s, int size){ + s->buf_end= s->buf + size; +} + /* Bitstream reader API docs: name abritary name which is used as prefix for the internal variables diff --git a/libavcodec/h263.c b/libavcodec/h263.c index b02bace0c0..313214cd45 100644 --- a/libavcodec/h263.c +++ b/libavcodec/h263.c @@ -1961,7 +1961,9 @@ void h263_encode_init(MpegEncContext *s) s->luma_dc_vlc_length= uni_DCtab_lum_len; s->chroma_dc_vlc_length= uni_DCtab_chrom_len; s->ac_esc_length= 7+2+1+6+1+12+1; - + s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; + s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table; + if(s->flags & CODEC_FLAG_GLOBAL_HEADER){ s->avctx->extradata= av_malloc(1024); @@ -2290,12 +2292,11 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1); put_bits(&s->pb, 1, 1); /* obmc disable */ if (vo_ver_id == 1) { - put_bits(&s->pb, 1, s->vol_sprite_usage=0); /* sprite enable */ + put_bits(&s->pb, 1, s->vol_sprite_usage); /* sprite enable */ }else{ - put_bits(&s->pb, 2, s->vol_sprite_usage=0); /* sprite enable */ + put_bits(&s->pb, 2, s->vol_sprite_usage); /* sprite enable */ } - s->quant_precision=5; put_bits(&s->pb, 1, 0); /* not 8 bit == false */ put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/ @@ -2384,9 +2385,6 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) if (s->pict_type == B_TYPE) put_bits(&s->pb, 3, s->b_code); /* fcode_back */ // printf("****frame %d\n", picture_number); - - s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; //FIXME add short header support - s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table; } #endif //CONFIG_ENCODERS @@ -2965,8 +2963,16 @@ static inline void memsetw(short *tab, int val, int n) void ff_mpeg4_init_partitions(MpegEncContext *s) { - init_put_bits(&s->tex_pb, s->tex_pb_buffer, PB_BUFFER_SIZE); - init_put_bits(&s->pb2 , s->pb2_buffer , PB_BUFFER_SIZE); + uint8_t *start= pbBufPtr(&s->pb); + uint8_t *end= s->pb.buf_end; + int size= end - start; + int pb_size = size/3; + int pb2_size= size/3; + int tex_size= size - pb_size - pb2_size; + + set_put_bits_buffer_size(&s->pb, pb_size); + init_put_bits(&s->tex_pb, start + pb_size , tex_size); + init_put_bits(&s->pb2 , start + pb_size + tex_size, pb2_size); } void ff_mpeg4_merge_partitions(MpegEncContext *s) @@ -2989,8 +2995,9 @@ void ff_mpeg4_merge_partitions(MpegEncContext *s) flush_put_bits(&s->pb2); flush_put_bits(&s->tex_pb); - ff_copy_bits(&s->pb, s->pb2_buffer , pb2_len); - ff_copy_bits(&s->pb, s->tex_pb_buffer, tex_pb_len); + set_put_bits_buffer_size(&s->pb, s->pb2.buf_end - s->pb.buf); + ff_copy_bits(&s->pb, s->pb2.buf , pb2_len); + ff_copy_bits(&s->pb, s->tex_pb.buf, tex_pb_len); s->last_bits= put_bits_count(&s->pb); } diff --git a/libavcodec/mjpeg.c b/libavcodec/mjpeg.c index 7aed3e28ac..30029d40c0 100644 --- a/libavcodec/mjpeg.c +++ b/libavcodec/mjpeg.c @@ -531,11 +531,16 @@ static void escape_FF(MpegEncContext *s, int start) } } +void ff_mjpeg_stuffing(PutBitContext * pbc) +{ + int length; + length= (-put_bits_count(pbc))&7; + if(length) put_bits(pbc, length, (1<<length)-1); +} + void mjpeg_picture_trailer(MpegEncContext *s) { - int pad= (-put_bits_count(&s->pb))&7; - - put_bits(&s->pb, pad,0xFF>>(8-pad)); + ff_mjpeg_stuffing(&s->pb); flush_put_bits(&s->pb); assert((s->header_bits&7)==0); diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c index e8641790b2..5132487cf3 100644 --- a/libavcodec/motion_est.c +++ b/libavcodec/motion_est.c @@ -805,7 +805,7 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift) if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift); /* special case for first line */ - if (s->mb_y == 0 && block<2) { + if (s->first_slice_line && block<2) { pred_x4= P_LEFT[0]; pred_y4= P_LEFT[1]; } else { @@ -845,13 +845,12 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift) int dxy; const int offset= ((block&1) + (block>>1)*stride)*8; uint8_t *dest_y = s->me.scratchpad + offset; - if(s->quarter_sample){ uint8_t *ref= ref_data[0] + (mx4>>2) + (my4>>2)*stride; dxy = ((my4 & 3) << 2) | (mx4 & 3); if(s->no_rounding) - s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , s->linesize); + s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , stride); else s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , stride); }else{ @@ -966,7 +965,7 @@ static int interlaced_search(MpegEncContext *s, uint8_t *frame_src_data[3], uint pred_x= P_LEFT[0]; pred_y= P_LEFT[1]; - if(s->mb_y){ + if(!s->first_slice_line){ P_TOP[0] = mv_table[xy - mot_stride][0]; P_TOP[1] = mv_table[xy - mot_stride][1]; P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0]; @@ -1115,7 +1114,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift); - if(mb_y) { + if(!s->first_slice_line) { P_TOP[0] = s->current_picture.motion_val[0][mot_xy - mot_stride ][0]; P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1]; P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0]; @@ -1164,8 +1163,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = vard; pic->mb_mean [s->mb_stride * mb_y + mb_x] = (sum+128)>>8; // pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin; - pic->mb_var_sum += varc; - pic->mc_mb_var_sum += vard; + s->mb_var_sum_temp += varc; + s->mc_mb_var_sum_temp += vard; //printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); #if 0 @@ -1326,7 +1325,7 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s, if(P_LEFT[0] < (s->me.xmin<<shift)) P_LEFT[0] = (s->me.xmin<<shift); /* special case for first line */ - if (mb_y == s->mb_height-1) { + if (s->first_slice_line) { pred_x= P_LEFT[0]; pred_y= P_LEFT[1]; P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]= @@ -1409,7 +1408,7 @@ static int ff_estimate_motion_b(MpegEncContext * s, if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift); /* special case for first line */ - if (mb_y) { + if (!s->first_slice_line) { P_TOP[0] = mv_table[mot_xy - mot_stride ][0]; P_TOP[1] = mv_table[mot_xy - mot_stride ][1]; P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1 ][0]; @@ -1610,7 +1609,7 @@ static inline int direct_search(MpegEncContext * s, uint8_t *src_data[3], uint8_ P_LEFT[1] = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift); /* special case for first line */ - if (mb_y) { + if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as its cliped P_TOP[0] = clip(mv_table[mot_xy - mot_stride ][0], xmin<<shift, xmax<<shift); P_TOP[1] = clip(mv_table[mot_xy - mot_stride ][1], ymin<<shift, ymax<<shift); P_TOPRIGHT[0] = clip(mv_table[mot_xy - mot_stride + 1 ][0], xmin<<shift, xmax<<shift); @@ -1727,7 +1726,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, } score= ((unsigned)(score*score + 128*256))>>16; - s->current_picture.mc_mb_var_sum += score; + s->mc_mb_var_sum_temp += score; s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE } diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c index 18203ec065..65fb3ae606 100644 --- a/libavcodec/motion_est_template.c +++ b/libavcodec/motion_est_template.c @@ -557,9 +557,11 @@ static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pre #define CHECK_CLIPED_MV(ax,ay)\ {\ - const int x= FFMAX(xmin, FFMIN(ax, xmax));\ - const int y= FFMAX(ymin, FFMIN(ay, ymax));\ - CHECK_MV(x, y)\ + const int x= ax;\ + const int y= ay;\ + const int x2= FFMAX(xmin, FFMIN(x, xmax));\ + const int y2= FFMAX(ymin, FFMIN(y, ymax));\ + CHECK_MV(x2, y2)\ } #define CHECK_MV_DIR(x,y,new_dir)\ @@ -912,7 +914,7 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s, score_map[0]= dmin; /* first line */ - if (s->mb_y == 0) { + if (s->first_slice_line) { CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) @@ -938,13 +940,15 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s, if(s->me.pre_pass){ CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16) - CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, - (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) + if(!s->first_slice_line) + CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, + (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) }else{ CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16) - CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, - (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) + if(s->end_mb_y == s->mb_height || s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line + CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, + (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) } } @@ -1024,7 +1028,7 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s, dmin = 1000000; //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); /* first line */ - if (s->mb_y == 0/* && block<2*/) { + if (s->first_slice_line) { CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) @@ -1044,8 +1048,9 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s, if(dmin>64*4){ CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16) - CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, - (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) + if(s->end_mb_y == s->mb_height || s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line + CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, + (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) } if(s->me.dia_size==-1) @@ -1102,7 +1107,7 @@ static int RENAME(epzs_motion_search2)(MpegEncContext * s, dmin = 1000000; //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); /* first line */ - if (s->mb_y == 0) { + if (s->first_slice_line) { CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) @@ -1122,8 +1127,9 @@ static int RENAME(epzs_motion_search2)(MpegEncContext * s, if(dmin>64*4){ CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16) - CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, - (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) + if(s->end_mb_y == s->mb_height || s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line + CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, + (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) } if(s->me.dia_size==-1) diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c index a66ea134ca..0e47048d44 100644 --- a/libavcodec/mpeg12.c +++ b/libavcodec/mpeg12.c @@ -419,7 +419,9 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number) put_bits(&s->pb, 8, 255); } put_bits(&s->pb, 2, s->intra_dc_precision); - put_bits(&s->pb, 2, s->picture_structure= PICT_FRAME); + + assert(s->picture_structure == PICT_FRAME); + put_bits(&s->pb, 2, s->picture_structure); if (s->progressive_sequence) { put_bits(&s->pb, 1, 0); /* no repeat */ } else { diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 41347a98b0..b65b65f3e1 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -401,6 +401,98 @@ static void free_picture(MpegEncContext *s, Picture *pic){ } } +static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){ + int i; + + CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance + s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17; + + //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer() + CHECKED_ALLOCZ(s->me.scratchpad, s->width*2*16*2*sizeof(uint8_t)) + s->rd_scratchpad= s->me.scratchpad; + s->b_scratchpad= s->me.scratchpad; + s->obmc_scratchpad= s->me.scratchpad + 16; + if (s->encoding) { + CHECKED_ALLOCZ(s->me.map , ME_MAP_SIZE*sizeof(uint32_t)) + CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t)) + if(s->avctx->noise_reduction){ + CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int)) + } + } + CHECKED_ALLOCZ(s->blocks, 64*6*2 * sizeof(DCTELEM)) + s->block= s->blocks[0]; + + for(i=0;i<12;i++){ + s->pblocks[i] = (short *)(&s->block[i]); + } + return 0; +fail: + return -1; //free() through MPV_common_end() +} + +static void free_duplicate_context(MpegEncContext *s){ + if(s==NULL) return; + + av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL; + av_freep(&s->me.scratchpad); + s->rd_scratchpad= + s->b_scratchpad= + s->obmc_scratchpad= NULL; + + av_freep(&s->dct_error_sum); + av_freep(&s->me.map); + av_freep(&s->me.score_map); + av_freep(&s->blocks); + s->block= NULL; +} + +static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){ +#define COPY(a) bak->a= src->a + COPY(allocated_edge_emu_buffer); + COPY(edge_emu_buffer); + COPY(me.scratchpad); + COPY(rd_scratchpad); + COPY(b_scratchpad); + COPY(obmc_scratchpad); + COPY(me.map); + COPY(me.score_map); + COPY(blocks); + COPY(block); + COPY(start_mb_y); + COPY(end_mb_y); + COPY(me.map_generation); + COPY(pb); + COPY(dct_error_sum); +#undef COPY +} + +static void update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){ + MpegEncContext bak; + //FIXME copy only needed parts +//START_TIMER + backup_duplicate_context(&bak, dst); + memcpy(dst, src, sizeof(MpegEncContext)); + backup_duplicate_context(dst, &bak); +//STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads +} + +static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){ +#define COPY(a) dst->a= src->a + COPY(pict_type); + COPY(current_picture); + COPY(f_code); + COPY(b_code); + COPY(qscale); + COPY(lambda); + COPY(lambda2); + COPY(picture_in_gop_number); + COPY(gop_picture_number); + COPY(frame_pred_frame_dct); //FIXME dont set in encode_header + COPY(progressive_frame); //FIXME dont set in encode_header + COPY(partitioned_frame); //FIXME dont set in encode_header +#undef COPY +} + /* init common structure for both encoder and decoder */ int MPV_common_init(MpegEncContext *s) { @@ -456,9 +548,6 @@ int MPV_common_init(MpegEncContext *s) + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16) + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24); - CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance - s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17; - s->avctx->coded_frame= (AVFrame*)&s->current_picture; CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this @@ -484,17 +573,6 @@ int MPV_common_init(MpegEncContext *s) s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1; s->b_direct_mv_table = s->b_direct_mv_table_base + s->mb_stride + 1; - //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer() - CHECKED_ALLOCZ(s->me.scratchpad, s->width*2*16*3*sizeof(uint8_t)) - - CHECKED_ALLOCZ(s->me.map , ME_MAP_SIZE*sizeof(uint32_t)) - CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t)) - - if(s->codec_id==CODEC_ID_MPEG4){ - CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE); - CHECKED_ALLOCZ( s->pb2_buffer, PB_BUFFER_SIZE); - } - if(s->msmpeg4_version){ CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int)); } @@ -513,12 +591,9 @@ int MPV_common_init(MpegEncContext *s) CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*)) if(s->avctx->noise_reduction){ - CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int)) CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t)) } } - CHECKED_ALLOCZ(s->blocks, 64*6*2 * sizeof(DCTELEM)) - CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture)) CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t)) @@ -578,12 +653,6 @@ int MPV_common_init(MpegEncContext *s) //Note the +1 is for a quicker mpeg4 slice_end detection CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE); - s->block= s->blocks[0]; - - for(i=0;i<12;i++){ - s->pblocks[i] = (short *)(&s->block[i]); - } - s->parse_context.state= -1; if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){ s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH); @@ -592,20 +661,38 @@ int MPV_common_init(MpegEncContext *s) } s->context_initialized = 1; + + s->thread_context[0]= s; + for(i=1; i<s->avctx->thread_count; i++){ + s->thread_context[i]= av_malloc(sizeof(MpegEncContext)); + memcpy(s->thread_context[i], s, sizeof(MpegEncContext)); + } + + for(i=0; i<s->avctx->thread_count; i++){ + if(init_duplicate_context(s->thread_context[i], s) < 0) + goto fail; + s->thread_context[i]->start_mb_y= (s->mb_height*(i ) + s->avctx->thread_count/2) / s->avctx->thread_count; + s->thread_context[i]->end_mb_y = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count; + } + return 0; fail: MPV_common_end(s); return -1; } - -//extern int sads; - /* init common structure for both encoder and decoder */ void MPV_common_end(MpegEncContext *s) { int i, j, k; + for(i=0; i<s->avctx->thread_count; i++){ + free_duplicate_context(s->thread_context[i]); + } + for(i=1; i<s->avctx->thread_count; i++){ + av_freep(&s->thread_context[i]); + } + av_freep(&s->parse_context.buffer); s->parse_context.buffer_size=0; @@ -641,16 +728,10 @@ void MPV_common_end(MpegEncContext *s) av_freep(&s->mbintra_table); av_freep(&s->cbp_table); av_freep(&s->pred_dir_table); - av_freep(&s->me.scratchpad); - av_freep(&s->me.map); - av_freep(&s->me.score_map); av_freep(&s->mbskip_table); av_freep(&s->prev_pict_types); av_freep(&s->bitstream_buffer); - av_freep(&s->tex_pb_buffer); - av_freep(&s->pb2_buffer); - av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL; av_freep(&s->avctx->stats_out); av_freep(&s->ac_stats); av_freep(&s->error_status_table); @@ -660,7 +741,6 @@ void MPV_common_end(MpegEncContext *s) av_freep(&s->q_inter_matrix); av_freep(&s->q_intra_matrix16); av_freep(&s->q_inter_matrix16); - av_freep(&s->blocks); av_freep(&s->input_picture); av_freep(&s->reordered_input_picture); av_freep(&s->dct_error_sum); @@ -797,6 +877,16 @@ int MPV_encode_init(AVCodecContext *avctx) av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n"); return -1; } + + if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4 + && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO + && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){ + av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n"); + return -1; + } + + if(s->avctx->thread_count > 1) + s->rtp_mode= 1; i= ff_gcd(avctx->frame_rate, avctx->frame_rate_base); if(i > 1){ @@ -990,6 +1080,7 @@ int MPV_encode_init(AVCodecContext *avctx) s->chroma_qscale_table= ff_h263_chroma_qscale_table; s->progressive_frame= s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME)); + s->quant_precision=5; ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp); @@ -1903,7 +1994,14 @@ int MPV_encode_picture(AVCodecContext *avctx, return -1; } - init_put_bits(&s->pb, buf, buf_size); + for(i=0; i<avctx->thread_count; i++){ + int y= s->thread_context[i]->start_mb_y; + int h= s->mb_height; + uint8_t *start= buf + buf_size* y /h; + uint8_t *end = buf + buf_size*(y+1)/h; + + init_put_bits(&s->thread_context[i]->pb, start, end - start); + } s->picture_in_gop_number++; @@ -2400,7 +2498,7 @@ static inline void obmc_motion(MpegEncContext *s, if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){ ptr[i]= ptr[MID]; }else{ - ptr[i]= s->edge_emu_buffer + 16 + 8*(i&1) + s->linesize*8*(i>>1); + ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1); hpel_motion(s, ptr[i], src, src_x, src_y, s->width, s->height, s->linesize, @@ -3026,9 +3124,9 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) dest_cb= s->dest[1]; dest_cr= s->dest[2]; }else{ - dest_y = s->edge_emu_buffer+32; //FIXME cleanup scratchpad pointers - dest_cb= s->edge_emu_buffer+48; - dest_cr= s->edge_emu_buffer+56; + dest_y = s->b_scratchpad; + dest_cb= s->b_scratchpad+16*s->linesize; + dest_cr= s->b_scratchpad+16*s->linesize+8; } if (!s->mb_intra) { /* motion handling */ @@ -3634,14 +3732,21 @@ void ff_mpeg_flush(AVCodecContext *avctx){ #ifdef CONFIG_ENCODERS void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length) { - int bytes= length>>4; + int words= length>>4; int bits= length&15; int i; if(length==0) return; - - for(i=0; i<bytes; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i])); - put_bits(pb, bits, be2me_16(((uint16_t*)src)[i])>>(16-bits)); + +// if(put_bits_count(pb)&7){ //FIXME + for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i])); +/* }else{ + flush_put_bits(pb); + memcpy(pbBufPtr(pb), src, 2*words); + skip_put_bytes(pb, 2*words); + }*/ + + put_bits(pb, bits, be2me_16(((uint16_t*)src)[words])>>(16-bits)); } static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){ @@ -3725,11 +3830,10 @@ static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegE if(*next_block){ memcpy(dest_backup, s->dest, sizeof(s->dest)); - s->dest[0] = s->me.scratchpad; - s->dest[1] = s->me.scratchpad + 16; - s->dest[2] = s->me.scratchpad + 16 + 8; - assert(2*s->uvlinesize == s->linesize); //should be no prob for encoding - assert(s->linesize >= 64); //FIXME + s->dest[0] = s->rd_scratchpad; + s->dest[1] = s->rd_scratchpad + 16*s->linesize; + s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8; + assert(s->linesize >= 32); //FIXME } encode_mb(s, motion_x, motion_y); @@ -3797,253 +3901,83 @@ static int sse_mb(MpegEncContext *s){ +sse(s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize); } -static void encode_picture(MpegEncContext *s, int picture_number) -{ - int mb_x, mb_y, pdif = 0; - int i, j; - int bits; - MpegEncContext best_s, backup_s; - uint8_t bit_buf[2][3000]; - uint8_t bit_buf2[2][3000]; - uint8_t bit_buf_tex[2][3000]; - PutBitContext pb[2], pb2[2], tex_pb[2]; - - for(i=0; i<2; i++){ - init_put_bits(&pb [i], bit_buf [i], 3000); - init_put_bits(&pb2 [i], bit_buf2 [i], 3000); - init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000); - } - - s->picture_number = picture_number; - - /* Reset the average MB variance */ - s->current_picture.mb_var_sum = 0; - s->current_picture.mc_mb_var_sum = 0; +static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){ + MpegEncContext *s= arg; -#ifdef CONFIG_RISKY - /* we need to initialize some time vars before we can encode b-frames */ - // RAL: Condition added for MPEG1VIDEO - if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4)) - ff_set_mpeg4_time(s, s->picture_number); -#endif - - s->scene_change_score=0; - - s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration - if(s->pict_type==I_TYPE){ - if(s->msmpeg4_version >= 3) s->no_rounding=1; - else s->no_rounding=0; - }else if(s->pict_type!=B_TYPE){ - if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4) - s->no_rounding ^= 1; + s->me.pre_pass=1; + s->me.dia_size= s->avctx->pre_dia_size; + s->first_slice_line=1; + for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) { + for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) { + ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y); + } + s->first_slice_line=0; } - /* Estimate motion for every MB */ - s->mb_intra=0; //for the rate distoration & bit compare functions - if(s->pict_type != I_TYPE){ - if(s->pict_type != B_TYPE){ - if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){ - s->me.pre_pass=1; - s->me.dia_size= s->avctx->pre_dia_size; - - for(mb_y=s->mb_height-1; mb_y >=0 ; mb_y--) { - s->mb_y = mb_y; - for(mb_x=s->mb_width-1; mb_x >=0 ; mb_x--) { - s->mb_x = mb_x; - ff_pre_estimate_p_frame_motion(s, mb_x, mb_y); - } - } - s->me.pre_pass=0; - } - } - - s->me.dia_size= s->avctx->dia_size; - for(mb_y=0; mb_y < s->mb_height; mb_y++) { - s->mb_y = mb_y; - s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1; - s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1); - s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1; - s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2); - for(mb_x=0; mb_x < s->mb_width; mb_x++) { - s->mb_x = mb_x; - s->block_index[0]+=2; - s->block_index[1]+=2; - s->block_index[2]+=2; - s->block_index[3]+=2; - - /* compute motion vector & mb_type and store in context */ - if(s->pict_type==B_TYPE) - ff_estimate_b_frame_motion(s, mb_x, mb_y); - else - ff_estimate_p_frame_motion(s, mb_x, mb_y); - } - } - }else /* if(s->pict_type == I_TYPE) */{ - /* I-Frame */ - for(i=0; i<s->mb_stride*s->mb_height; i++) - s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA; - - if(!s->fixed_qscale){ - /* finding spatial complexity for I-frame rate control */ - for(mb_y=0; mb_y < s->mb_height; mb_y++) { - for(mb_x=0; mb_x < s->mb_width; mb_x++) { - int xx = mb_x * 16; - int yy = mb_y * 16; - uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx; - int varc; - int sum = s->dsp.pix_sum(pix, s->linesize); + s->me.pre_pass=0; - varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; + return 0; +} - s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc; - s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8; - s->current_picture.mb_var_sum += varc; - } - } - } - } - emms_c(); +static int estimate_motion_thread(AVCodecContext *c, void *arg){ + MpegEncContext *s= arg; - if(s->scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){ - s->pict_type= I_TYPE; - for(i=0; i<s->mb_stride*s->mb_height; i++) - s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA; -//printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum); + s->me.dia_size= s->avctx->dia_size; + s->first_slice_line=1; + for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) { + s->mb_x=0; //for block init below + ff_init_block_index(s); + for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) { + s->block_index[0]+=2; + s->block_index[1]+=2; + s->block_index[2]+=2; + s->block_index[3]+=2; + + /* compute motion vector & mb_type and store in context */ + if(s->pict_type==B_TYPE) + ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y); + else + ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y); + } + s->first_slice_line=0; } + return 0; +} - if(!s->umvplus){ - if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) { - s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER); - - if(s->flags & CODEC_FLAG_INTERLACED_ME){ - int a,b; - a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select - b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I); - s->f_code= FFMAX(s->f_code, FFMAX(a,b)); - } - - ff_fix_long_p_mvs(s); - ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0); - if(s->flags & CODEC_FLAG_INTERLACED_ME){ - for(i=0; i<2; i++){ - for(j=0; j<2; j++) - ff_fix_long_mvs(s, s->p_field_select_table[i], j, - s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0); - } - } +static void write_slice_end(MpegEncContext *s){ + if(s->codec_id==CODEC_ID_MPEG4){ + if(s->partitioned_frame){ + ff_mpeg4_merge_partitions(s); } - - if(s->pict_type==B_TYPE){ - int a, b; - - a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD); - b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR); - s->f_code = FFMAX(a, b); - - a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD); - b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR); - s->b_code = FFMAX(a, b); - - ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1); - ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1); - ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1); - ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1); - if(s->flags & CODEC_FLAG_INTERLACED_ME){ - int dir; - for(dir=0; dir<2; dir++){ - for(i=0; i<2; i++){ - for(j=0; j<2; j++){ - int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I) - : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I); - ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j, - s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1); - } - } - } - } - } - } - if (!s->fixed_qscale) - s->current_picture.quality = ff_rate_estimate_qscale(s); + ff_mpeg4_stuffing(&s->pb); + }else if(s->out_format == FMT_MJPEG){ + ff_mjpeg_stuffing(&s->pb); + } - if(s->adaptive_quant){ -#ifdef CONFIG_RISKY - switch(s->codec_id){ - case CODEC_ID_MPEG4: - ff_clean_mpeg4_qscales(s); - break; - case CODEC_ID_H263: - case CODEC_ID_H263P: - case CODEC_ID_FLV1: - ff_clean_h263_qscales(s); - break; - } -#endif + align_put_bits(&s->pb); + flush_put_bits(&s->pb); +} - s->lambda= s->lambda_table[0]; - //FIXME broken - }else - s->lambda= s->current_picture.quality; -//printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality); - update_qscale(s); - - if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE)) - s->qscale= 3; //reduce cliping problems - - if (s->out_format == FMT_MJPEG) { - /* for mjpeg, we do include qscale in the matrix */ - s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0]; - for(i=1;i<64;i++){ - int j= s->dsp.idct_permutation[i]; +static int encode_thread(AVCodecContext *c, void *arg){ + MpegEncContext *s= arg; + int mb_x, mb_y, pdif = 0; + int i, j; + MpegEncContext best_s, backup_s; + uint8_t bit_buf[2][3000]; + uint8_t bit_buf2[2][3000]; + uint8_t bit_buf_tex[2][3000]; + PutBitContext pb[2], pb2[2], tex_pb[2]; +//printf("%d->%d\n", s->resync_mb_y, s->end_mb_y); - s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); - } - convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, - s->intra_matrix, s->intra_quant_bias, 8, 8); - s->qscale= 8; + for(i=0; i<2; i++){ + init_put_bits(&pb [i], bit_buf [i], 3000); + init_put_bits(&pb2 [i], bit_buf2 [i], 3000); + init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000); } - - //FIXME var duplication - s->current_picture.key_frame= s->pict_type == I_TYPE; - s->current_picture.pict_type= s->pict_type; - - if(s->current_picture.key_frame) - s->picture_in_gop_number=0; s->last_bits= put_bits_count(&s->pb); - switch(s->out_format) { - case FMT_MJPEG: - mjpeg_picture_header(s); - break; -#ifdef CONFIG_RISKY - case FMT_H263: - if (s->codec_id == CODEC_ID_WMV2) - ff_wmv2_encode_picture_header(s, picture_number); - else if (s->h263_msmpeg4) - msmpeg4_encode_picture_header(s, picture_number); - else if (s->h263_pred) - mpeg4_encode_picture_header(s, picture_number); - else if (s->codec_id == CODEC_ID_RV10) - rv10_encode_picture_header(s, picture_number); - else if (s->codec_id == CODEC_ID_FLV1) - ff_flv_encode_picture_header(s, picture_number); - else - h263_encode_picture_header(s, picture_number); - break; -#endif - case FMT_MPEG1: - mpeg1_encode_picture_header(s, picture_number); - break; - case FMT_H264: - break; - default: - assert(0); - } - bits= put_bits_count(&s->pb); - s->header_bits= bits - s->last_bits; - s->last_bits= bits; s->mv_bits=0; s->misc_bits=0; s->i_tex_bits=0; @@ -4080,10 +4014,11 @@ static void encode_picture(MpegEncContext *s, int picture_number) #endif s->resync_mb_x=0; - s->resync_mb_y=0; + s->resync_mb_y=0; s->first_slice_line = 1; s->ptr_lastgob = s->pb.buf; - for(mb_y=0; mb_y < s->mb_height; mb_y++) { + for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) { +// printf("row %d at %X\n", s->mb_y, (int)s); s->mb_x=0; s->mb_y= mb_y; @@ -4105,10 +4040,12 @@ static void encode_picture(MpegEncContext *s, int picture_number) if(s->rtp_mode){ int current_packet_size, is_gob_start; - current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob; + current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob; //FIXME wrong is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0; + if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1; + switch(s->codec_id){ case CODEC_ID_H263: case CODEC_ID_H263P: @@ -4121,19 +4058,16 @@ static void encode_picture(MpegEncContext *s, int picture_number) if(s->mb_skip_run) is_gob_start=0; break; } - + if(is_gob_start){ - if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){ - ff_mpeg4_merge_partitions(s); - ff_mpeg4_init_partitions(s); + if(s->start_mb_y != mb_y || mb_x!=0){ + write_slice_end(s); + + if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){ + ff_mpeg4_init_partitions(s); + } } - if(s->codec_id==CODEC_ID_MPEG4) - ff_mpeg4_stuffing(&s->pb); - - align_put_bits(&s->pb); - flush_put_bits(&s->pb); - assert((put_bits_count(&s->pb)&7) == 0); current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob; @@ -4417,10 +4351,10 @@ static void encode_picture(MpegEncContext *s, int picture_number) ff_h263_update_motion_val(s); #endif - if(next_block==0){ - s->dsp.put_pixels_tab[0][0](s->dest[0], s->me.scratchpad , s->linesize ,16); - s->dsp.put_pixels_tab[1][0](s->dest[1], s->me.scratchpad + 16, s->uvlinesize, 8); - s->dsp.put_pixels_tab[1][0](s->dest[2], s->me.scratchpad + 24, s->uvlinesize, 8); + if(next_block==0){ //FIXME 16 vs linesize16 + s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad , s->linesize ,16); + s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize , s->uvlinesize, 8); + s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8); } if(s->avctx->mb_decision == FF_MB_DECISION_BITS) @@ -4577,26 +4511,286 @@ static void encode_picture(MpegEncContext *s, int picture_number) //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb)); } } - emms_c(); #ifdef CONFIG_RISKY - if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame) - ff_mpeg4_merge_partitions(s); - + //not beautifull here but we must write it before flushing so it has to be here if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE) msmpeg4_encode_ext_header(s); - - if(s->codec_id==CODEC_ID_MPEG4) - ff_mpeg4_stuffing(&s->pb); #endif + write_slice_end(s); + /* Send the last GOB if RTP */ if (s->avctx->rtp_callback) { - flush_put_bits(&s->pb); pdif = pbBufPtr(&s->pb) - s->ptr_lastgob; /* Call the RTP callback to send the last GOB */ + emms_c(); s->avctx->rtp_callback(s->ptr_lastgob, pdif, 0); } + + return 0; +} + +#define MERGE(field) dst->field += src->field; src->field=0 +static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){ + MERGE(scene_change_score); + MERGE(mc_mb_var_sum_temp); + MERGE(mb_var_sum_temp); +} + +static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){ + int i; + + MERGE(dct_count[0]); //note, the other dct vars are not part of the context + MERGE(dct_count[1]); + MERGE(mv_bits); + MERGE(header_bits); + MERGE(i_tex_bits); + MERGE(p_tex_bits); + MERGE(i_count); + MERGE(f_count); + MERGE(b_count); + MERGE(skip_count); + MERGE(misc_bits); + MERGE(error_count); + MERGE(padding_bug_score); + + if(dst->avctx->noise_reduction){ + for(i=0; i<64; i++){ + MERGE(dct_error_sum[0][i]); + MERGE(dct_error_sum[1][i]); + } + } + + assert(put_bits_count(&src->pb) % 8 ==0); + assert(put_bits_count(&dst->pb) % 8 ==0); + ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb)); + flush_put_bits(&dst->pb); +} + +static void encode_picture(MpegEncContext *s, int picture_number) +{ + int mb_x, mb_y; + int i, j; + int bits; + + s->picture_number = picture_number; + + /* Reset the average MB variance */ + s->mb_var_sum_temp = + s->mc_mb_var_sum_temp = 0; + +#ifdef CONFIG_RISKY + /* we need to initialize some time vars before we can encode b-frames */ + // RAL: Condition added for MPEG1VIDEO + if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4)) + ff_set_mpeg4_time(s, s->picture_number); +#endif + + s->scene_change_score=0; + + s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration + + if(s->pict_type==I_TYPE){ + if(s->msmpeg4_version >= 3) s->no_rounding=1; + else s->no_rounding=0; + }else if(s->pict_type!=B_TYPE){ + if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4) + s->no_rounding ^= 1; + } + + s->mb_intra=0; //for the rate distoration & bit compare functions + for(i=1; i<s->avctx->thread_count; i++){ + update_duplicate_context(s->thread_context[i], s); + } + + /* Estimate motion for every MB */ + if(s->pict_type != I_TYPE){ + if(s->pict_type != B_TYPE){ + if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){ + s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count); + } + } + + s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count); + for(i=1; i<s->avctx->thread_count; i++){ + merge_context_after_me(s, s->thread_context[i]); + } + }else /* if(s->pict_type == I_TYPE) */{ + /* I-Frame */ + for(i=0; i<s->mb_stride*s->mb_height; i++) + s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA; + + if(!s->fixed_qscale){ + /* finding spatial complexity for I-frame rate control */ + for(mb_y=0; mb_y < s->mb_height; mb_y++) { + for(mb_x=0; mb_x < s->mb_width; mb_x++) { + int xx = mb_x * 16; + int yy = mb_y * 16; + uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx; + int varc; + int sum = s->dsp.pix_sum(pix, s->linesize); + + varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; + + s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc; + s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8; + s->mb_var_sum_temp += varc; + } + } + } + } + s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->mc_mb_var_sum_temp; + s->current_picture. mb_var_sum= s->current_picture_ptr-> mb_var_sum= s-> mb_var_sum_temp; + emms_c(); + + if(s->scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){ + s->pict_type= I_TYPE; + for(i=0; i<s->mb_stride*s->mb_height; i++) + s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA; +//printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum); + } + + if(!s->umvplus){ + if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) { + s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER); + + if(s->flags & CODEC_FLAG_INTERLACED_ME){ + int a,b; + a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select + b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I); + s->f_code= FFMAX(s->f_code, FFMAX(a,b)); + } + + ff_fix_long_p_mvs(s); + ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0); + if(s->flags & CODEC_FLAG_INTERLACED_ME){ + for(i=0; i<2; i++){ + for(j=0; j<2; j++) + ff_fix_long_mvs(s, s->p_field_select_table[i], j, + s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0); + } + } + } + + if(s->pict_type==B_TYPE){ + int a, b; + + a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD); + b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR); + s->f_code = FFMAX(a, b); + + a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD); + b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR); + s->b_code = FFMAX(a, b); + + ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1); + ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1); + ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1); + ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1); + if(s->flags & CODEC_FLAG_INTERLACED_ME){ + int dir; + for(dir=0; dir<2; dir++){ + for(i=0; i<2; i++){ + for(j=0; j<2; j++){ + int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I) + : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I); + ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j, + s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1); + } + } + } + } + } + } + + if (!s->fixed_qscale) + s->current_picture.quality = ff_rate_estimate_qscale(s); //FIXME pic_ptr + + if(s->adaptive_quant){ +#ifdef CONFIG_RISKY + switch(s->codec_id){ + case CODEC_ID_MPEG4: + ff_clean_mpeg4_qscales(s); + break; + case CODEC_ID_H263: + case CODEC_ID_H263P: + case CODEC_ID_FLV1: + ff_clean_h263_qscales(s); + break; + } +#endif + + s->lambda= s->lambda_table[0]; + //FIXME broken + }else + s->lambda= s->current_picture.quality; +//printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality); + update_qscale(s); + + if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE)) + s->qscale= 3; //reduce cliping problems + + if (s->out_format == FMT_MJPEG) { + /* for mjpeg, we do include qscale in the matrix */ + s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0]; + for(i=1;i<64;i++){ + int j= s->dsp.idct_permutation[i]; + + s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); + } + convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, + s->intra_matrix, s->intra_quant_bias, 8, 8); + s->qscale= 8; + } + + //FIXME var duplication + s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr + s->current_picture.pict_type= s->pict_type; + + if(s->current_picture.key_frame) + s->picture_in_gop_number=0; + + s->last_bits= put_bits_count(&s->pb); + switch(s->out_format) { + case FMT_MJPEG: + mjpeg_picture_header(s); + break; +#ifdef CONFIG_RISKY + case FMT_H263: + if (s->codec_id == CODEC_ID_WMV2) + ff_wmv2_encode_picture_header(s, picture_number); + else if (s->h263_msmpeg4) + msmpeg4_encode_picture_header(s, picture_number); + else if (s->h263_pred) + mpeg4_encode_picture_header(s, picture_number); + else if (s->codec_id == CODEC_ID_RV10) + rv10_encode_picture_header(s, picture_number); + else if (s->codec_id == CODEC_ID_FLV1) + ff_flv_encode_picture_header(s, picture_number); + else + h263_encode_picture_header(s, picture_number); + break; +#endif + case FMT_MPEG1: + mpeg1_encode_picture_header(s, picture_number); + break; + case FMT_H264: + break; + default: + assert(0); + } + bits= put_bits_count(&s->pb); + s->header_bits= bits - s->last_bits; + + for(i=1; i<s->avctx->thread_count; i++){ + update_duplicate_context_after_me(s->thread_context[i], s); + } + s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count); + for(i=1; i<s->avctx->thread_count; i++){ + merge_context_after_encode(s, s->thread_context[i]); + } + emms_c(); } #endif //CONFIG_ENCODERS diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index aeeb8866a6..6eb3a0fcff 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -47,6 +47,8 @@ enum OutputFormat { #define MAX_FCODE 7 #define MAX_MV 2048 +#define MAX_THREADS 8 + #define MAX_PICTURE_COUNT 15 #define ME_MAP_SIZE 64 @@ -285,6 +287,10 @@ typedef struct MpegEncContext { Picture **input_picture; ///< next pictures on display order for encoding Picture **reordered_input_picture; ///< pointer to the next pictures in codedorder for encoding + int start_mb_y; ///< start mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y) + int end_mb_y; ///< end mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y) + struct MpegEncContext *thread_context[MAX_THREADS]; + /** * copy of the previous picture structure. * note, linesize & data, might not match the previous picture (for field pictures) @@ -332,7 +338,10 @@ typedef struct MpegEncContext { uint8_t *cbp_table; ///< used to store cbp, ac_pred for partitioned decoding uint8_t *pred_dir_table; ///< used to store pred_dir for partitioned decoding uint8_t *allocated_edge_emu_buffer; - uint8_t *edge_emu_buffer; ///< points into the middle of allocated_edge_emu_buffer + uint8_t *edge_emu_buffer; ///< points into the middle of allocated_edge_emu_buffer + uint8_t *rd_scratchpad; ///< scartchpad for rate distortion mb decission + uint8_t *obmc_scratchpad; + uint8_t *b_scratchpad; ///< scratchpad used for writing into write only buffers int qscale; ///< QP int chroma_qscale; ///< chroma QP @@ -487,6 +496,10 @@ typedef struct MpegEncContext { int misc_bits; ///< cbp, mb_type int last_bits; ///< temp var used for calculating the above vars + /* temp variables for picture complexity calculation */ + int mc_mb_var_sum_temp; + int mb_var_sum_temp; + /* error concealment / resync */ int error_count; uint8_t *error_status_table; ///< table of the error status of each MB @@ -565,9 +578,6 @@ typedef struct MpegEncContext { int intra_dc_threshold; ///< QP above whch the ac VLC should be used for intra dc PutBitContext tex_pb; ///< used for data partitioned VOPs PutBitContext pb2; ///< used for data partitioned VOPs -#define PB_BUFFER_SIZE 1024*256 - uint8_t *tex_pb_buffer; - uint8_t *pb2_buffer; int mpeg_quant; int t_frame; ///< time distance of first I -> B, used for interlaced b frames int padding_bug_score; ///< used to detect the VERY common padding bug in MPEG4 @@ -908,6 +918,7 @@ void mjpeg_encode_mb(MpegEncContext *s, DCTELEM block[6][64]); void mjpeg_picture_header(MpegEncContext *s); void mjpeg_picture_trailer(MpegEncContext *s); +void ff_mjpeg_stuffing(PutBitContext * pbc); /* rate control */ diff --git a/libavcodec/pthread.c b/libavcodec/pthread.c new file mode 100644 index 0000000000..6f98dc41f4 --- /dev/null +++ b/libavcodec/pthread.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include <semaphore.h> +#include <pthread.h> + +//#define DEBUG + +#include "avcodec.h" +#include "common.h" + + +typedef struct ThreadContext{ + AVCodecContext *avctx; + pthread_t thread; + sem_t work_sem; + sem_t done_sem; + int (*func)(AVCodecContext *c, void *arg); + void *arg; + int ret; +}ThreadContext; + +static void * thread_func(void *v){ + ThreadContext *c= v; + + for(;;){ +//printf("thread_func %X enter wait\n", (int)v); fflush(stdout); + sem_wait(&c->work_sem); +//printf("thread_func %X after wait (func=%X)\n", (int)v, (int)c->func); fflush(stdout); + if(c->func) + c->ret= c->func(c->avctx, c->arg); + else + return NULL; +//printf("thread_func %X signal complete\n", (int)v); fflush(stdout); + sem_post(&c->done_sem); + } + + return NULL; +} + +/** + * free what has been allocated by avcodec_pthread_init(). + * must be called after decoding has finished, especially dont call while avcodec_pthread_execute() is running + */ +void avcodec_pthread_free(AVCodecContext *s){ + ThreadContext *c= s->thread_opaque; + int i; + + for(i=0; i<s->thread_count; i++){ + int val; + + sem_getvalue(&c[i].work_sem, &val); assert(val == 0); + sem_getvalue(&c[i].done_sem, &val); assert(val == 0); + + c[i].func= NULL; + sem_post(&c[i].work_sem); + pthread_join(c[i].thread, NULL); + sem_destroy(&c[i].work_sem); + sem_destroy(&c[i].done_sem); + } + + av_freep(&s->thread_opaque); +} + +int avcodec_pthread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count){ + ThreadContext *c= s->thread_opaque; + int i, val; + + assert(s == c->avctx); + assert(count <= s->thread_count); + + /* note, we can be certain that this is not called with the same AVCodecContext by different threads at the same time */ + + for(i=0; i<count; i++){ + sem_getvalue(&c[i].work_sem, &val); assert(val == 0); + sem_getvalue(&c[i].done_sem, &val); assert(val == 0); + + c[i].arg= arg[i]; + c[i].func= func; + c[i].ret= 12345; + sem_post(&c[i].work_sem); + } + for(i=0; i<count; i++){ + sem_wait(&c[i].done_sem); + + sem_getvalue(&c[i].work_sem, &val); assert(val == 0); + sem_getvalue(&c[i].done_sem, &val); assert(val == 0); + + c[i].func= NULL; + if(ret) ret[i]= c[i].ret; + } + return 0; +} + +int avcodec_pthread_init(AVCodecContext *s, int thread_count){ + int i; + ThreadContext *c; + + s->thread_count= thread_count; + + assert(!s->thread_opaque); + c= av_mallocz(sizeof(ThreadContext)*thread_count); + s->thread_opaque= c; + + for(i=0; i<thread_count; i++){ +//printf("init semaphors %d\n", i); fflush(stdout); + c[i].avctx= s; + if(sem_init(&c[i].work_sem, 0, 0)) + goto fail; + if(sem_init(&c[i].done_sem, 0, 0)) + goto fail; +//printf("create thread %d\n", i); fflush(stdout); + if(pthread_create(&c[i].thread, NULL, thread_func, &c[i])) + goto fail; + } +//printf("init done\n"); fflush(stdout); + + s->execute= avcodec_pthread_execute; + + return 0; +fail: + avcodec_pthread_free(s); + return -1; +} diff --git a/libavcodec/utils.c b/libavcodec/utils.c index d04c2faaf0..e49ddd07f0 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -324,6 +324,16 @@ int avcodec_default_reget_buffer(AVCodecContext *s, AVFrame *pic){ return 0; } +int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count){ + int i; + + for(i=0; i<count; i++){ + int r= func(c, arg[i]); + if(ret) ret[i]= r; + } + return 0; +} + enum PixelFormat avcodec_default_get_format(struct AVCodecContext *s, enum PixelFormat * fmt){ return fmt[0]; } @@ -352,6 +362,8 @@ void avcodec_get_context_defaults(AVCodecContext *s){ s->get_buffer= avcodec_default_get_buffer; s->release_buffer= avcodec_default_release_buffer; s->get_format= avcodec_default_get_format; + s->execute= avcodec_default_execute; + s->thread_count=1; s->me_subpel_quality=8; s->lmin= FF_QP2LAMBDA * s->qmin; s->lmax= FF_QP2LAMBDA * s->qmax; diff --git a/tests/ffmpeg.regression.ref b/tests/ffmpeg.regression.ref index cfbb2469ac..82d65e3bf8 100644 --- a/tests/ffmpeg.regression.ref +++ b/tests/ffmpeg.regression.ref @@ -8,6 +8,9 @@ stddev: 7.65 PSNR:30.44 bytes:7602176 13336cffcba456ff4a7607b2a7e57b33 *./data/a-mpeg2i.mpg 4c9701eb83ed81dd9a328af83d7d7c8a *./data/out.yuv stddev: 7.66 PSNR:30.43 bytes:7602176 +8c4a7744f40a1e7aa16b985ecaad176a *./data/a-mpeg2thread.mpg +12ab090b699c130e5aef8e050965f092 *./data/out.yuv +stddev: 9.44 PSNR:28.62 bytes:7299072 d0dc46dd831398237a690ebbeff18b64 *./data/a-msmpeg4v2.avi 712aa6c959d1d90a78fe98657cbff19c *./data/out.yuv stddev: 8.11 PSNR:29.94 bytes:7602176 @@ -38,10 +41,13 @@ stddev: 10.18 PSNR:27.96 bytes:7145472 64b4b917014169294d59fe43ad6b3da9 *./data/a-mpeg4-adv.avi 8069deacba9756fd25ad37b467eb6365 *./data/out.yuv stddev: 10.23 PSNR:27.92 bytes:7602176 +96453d489d5418e382824cfb2673ac58 *./data/a-mpeg4-thread.avi +17ec2d72186dbb72d8a79cd448796cef *./data/out.yuv +stddev: 12.09 PSNR:26.47 bytes:7145472 f863f4198521bd76930ea33991b47273 *./data/a-error-mpeg4-adv.avi ba7fcd126c7c9fead5a5de71aaaf0624 *./data/out.yuv stddev: 16.80 PSNR:23.61 bytes:7602176 -198ad515da4f330d780c54fd8d6186ab *./data/a-error-mpeg4-nr.avi +198ad515da4f330d780c54fd8d6186ab *./data/a-mpeg4-nr.avi ebdb326e19aeab8e3c70d7050dc3b240 *./data/out.yuv stddev: 7.02 PSNR:31.19 bytes:7602176 328ebd044362116e274739e23c482ee7 *./data/a-mpeg1b.mpg diff --git a/tests/regression.sh b/tests/regression.sh index d7883136e9..69e8a849cb 100755 --- a/tests/regression.sh +++ b/tests/regression.sh @@ -33,6 +33,7 @@ elif [ "$1" = "libavtest" ] ; then else do_mpeg=y do_mpeg2=y + do_mpeg2thread=y do_msmpeg4v2=y do_msmpeg4=y do_wmv1=y @@ -51,6 +52,7 @@ else do_adpcm_ms=y do_rc=y do_mpeg4adv=y + do_mpeg4thread=y do_mpeg4nr=y do_mpeg1b=y do_asv1=y @@ -149,6 +151,16 @@ do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst fi ################################### +if [ -n "$do_mpeg2thread" ] ; then +# mpeg2 encoding interlaced +file=${outfile}mpeg2thread.mpg +do_ffmpeg $file -y -qscale 10 -f pgmyuv -i $raw_src -vcodec mpeg2video -f mpeg1video -bf 2 -ildct -ilme -threads 2 $file + +# mpeg2 decoding +do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst +fi + +################################### if [ -n "$do_msmpeg4v2" ] ; then # msmpeg4 encoding file=${outfile}msmpeg4v2.avi @@ -249,6 +261,16 @@ do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst fi ################################### +if [ -n "$do_mpeg4thread" ] ; then +# mpeg4 +file=${outfile}mpeg4-thread.avi +do_ffmpeg $file -y -b 500 -4mv -hq -part -ps 200 -aic -trell -bf 2 -f pgmyuv -i $raw_src -an -vcodec mpeg4 -threads 2 $file + +# mpeg4 decoding +do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst +fi + +################################### if [ -n "$do_error" ] ; then # damaged mpeg4 file=${outfile}error-mpeg4-adv.avi @@ -261,7 +283,7 @@ fi ################################### if [ -n "$do_mpeg4nr" ] ; then # noise reduction -file=${outfile}error-mpeg4-nr.avi +file=${outfile}mpeg4-nr.avi do_ffmpeg $file -y -qscale 8 -4mv -mbd 2 -nr 200 -f pgmyuv -i $raw_src -an -vcodec mpeg4 $file # mpeg4 decoding diff --git a/tests/rotozoom.regression.ref b/tests/rotozoom.regression.ref index 9a2729ca79..d67aa7916a 100644 --- a/tests/rotozoom.regression.ref +++ b/tests/rotozoom.regression.ref @@ -8,6 +8,9 @@ stddev: 4.95 PSNR:34.22 bytes:7602176 6da01fd0d910fbfcdc5b212ef3dd65cb *./data/a-mpeg2i.mpg 1e21fd7ed53abf352f9ea8548afa80a3 *./data/out.yuv stddev: 4.96 PSNR:34.20 bytes:7602176 +a3dd9c2911c9556d377ab1465f7365b4 *./data/a-mpeg2thread.mpg +8cf98fa5c59c959e35389a1a7180b379 *./data/out.yuv +stddev: 5.55 PSNR:33.22 bytes:7299072 14db391f167b52b21a983157b410affc *./data/a-msmpeg4v2.avi fc8881e0904af9491d5fa0163183954b *./data/out.yuv stddev: 5.29 PSNR:33.64 bytes:7602176 @@ -38,10 +41,13 @@ stddev: 4.20 PSNR:35.64 bytes:7145472 accf60d11aceecabb3c1997aec6e18b5 *./data/a-mpeg4-adv.avi a287b07b812fbeeb5364517303178ac7 *./data/out.yuv stddev: 4.77 PSNR:34.54 bytes:7602176 +8750b3935266211fea6b062f445bb305 *./data/a-mpeg4-thread.avi +aee1867b77490b3f8d58fcc9b7c5b535 *./data/out.yuv +stddev: 3.92 PSNR:36.25 bytes:7145472 03ff35856faefb4882eaf4d86d95bea7 *./data/a-error-mpeg4-adv.avi 8550acff0851ee915bd5800f1e20f37c *./data/out.yuv stddev: 9.66 PSNR:28.42 bytes:7602176 -74dbbba19d250a712702b1893c003461 *./data/a-error-mpeg4-nr.avi +74dbbba19d250a712702b1893c003461 *./data/a-mpeg4-nr.avi 81b985840c03bf101302abde131e3900 *./data/out.yuv stddev: 4.67 PSNR:34.73 bytes:7602176 671802a2c5078e69f7f422765ea87f2a *./data/a-mpeg1b.mpg |