Merge commit '1a9f9f8' into oldabi

* commit '1a9f9f8': (98 commits) Do not drop packets with no valid ->pos set as e.g. DV-in-AVI produces. FFMPEG: support demuxer specific options. Signed-off-by: Michael Niedermayer <michaelni@gmx.at> AVIDEC: use_odmc demuxer specific option. (mostly an exmaple for demuxer specific options) Signed-off-by: Michael Niedermayer <michaelni@gmx.at> LAVFAPI: demuxer specific options. (someone please add doxy) Signed-off-by: Michael Niedermayer <michaelni@gmx.at> output_example: use avformat_alloc_output_context() Signed-off-by: Michael Niedermayer <michaelni@gmx.at> LAVFAPI: avformat_alloc_output_context() / simplify usage of muxers. Signed-off-by: Michael Niedermayer <michaelni@gmx.at> LAVF API: remove AVOutputFormat.set_parameters() the field is unused. Signed-off-by: Michael Niedermayer <michaelni@gmx.at> CrystalHD: Add auto-detection of packed b-frame bug. lavc: remove disabled avcodec_decode_video() code Read the album_artist, grouping and lyrics metadata. In libx264 wrapper, change wpredp to a codec specific option. AMV: disable DR1 and don't override EMU_EDGE lavf: inspect more frames for fps when container time base is coarse Fix races in default av_log handler flashsv2enc: regression test. Signed-off-by: Michael Niedermayer <michaelni@gmx.at> vorbis: Replace sized int_fast integer types with plain int/unsigned. Remove disabled non-optimized code variants. bswap.h: Remove disabled code. Remove some disabled printf debug cruft. Replace more disabled printf() calls by av_dlog(). ... Conflicts: libavcodec/options.c libavcodec/qpeg.c libavfilter/avfilter.h libavformat/avformat.h Merged-by: Michael Niedermayer <michaelni@gmx.at>
author: Michael Niedermayer <michaelni@gmx.at> 2011-05-02 04:18:04 +0200
committer: Michael Niedermayer <michaelni@gmx.at> 2011-05-02 04:18:04 +0200
commit: d4b98d475f8abf9f05ebe91b3fce341aa4b902ee (patch)
tree: bc452a41fbc304d19ee058da006e9760cb160d6a /libavcodec
parent: 8d8962ca3e95b1ce86ab505498e7cd3ec89fa996 (diff)
parent: 1a9f9f81b1244b952126bb65bc741b04d3534f81 (diff)
download: ffmpeg-d4b98d475f8abf9f05ebe91b3fce341aa4b902ee.tar.gz
58 files changed, 1451 insertions, 759 deletions
diff --git a/libavcodec/4xm.c b/libavcodec/4xm.c
index 97436ce187..3a42642514 100644
--- a/libavcodec/4xm.c
+++ b/libavcodec/4xm.c
@@ -773,12 +773,9 @@ static int decode_frame(AVCodecContext *avctx,
 
     avctx->flags |= CODEC_FLAG_EMU_EDGE; // alternatively we would have to use our own buffer management
 
-    if(p->data[0])
-        avctx->release_buffer(avctx, p);
-
     p->reference= 1;
-    if(avctx->get_buffer(avctx, p) < 0){
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if (avctx->reget_buffer(avctx, p) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
         return -1;
     }
 
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 9378d2804a..4e693cc251 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -146,6 +146,7 @@ OBJS-$(CONFIG_FLAC_DECODER)            += flacdec.o flacdata.o flac.o vorbis_dat
 OBJS-$(CONFIG_FLAC_ENCODER)            += flacenc.o flacdata.o flac.o
 OBJS-$(CONFIG_FLASHSV_DECODER)         += flashsv.o
 OBJS-$(CONFIG_FLASHSV_ENCODER)         += flashsvenc.o
+OBJS-$(CONFIG_FLASHSV2_ENCODER)        += flashsv2enc.o
 OBJS-$(CONFIG_FLIC_DECODER)            += flicvideo.o
 OBJS-$(CONFIG_FOURXM_DECODER)          += 4xm.o
 OBJS-$(CONFIG_FRAPS_DECODER)           += fraps.o
diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index 54aab6e0bc..5cc85dd613 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -130,6 +130,7 @@ typedef struct {
 #define SCALE_MAX_POS   255    ///< scalefactor index maximum value
 #define SCALE_MAX_DIFF   60    ///< maximum scalefactor difference allowed by standard
 #define SCALE_DIFF_ZERO  60    ///< codebook index corresponding to zero scalefactor indices difference
+#define POW_SF2_ZERO    200    ///< ff_aac_pow2sf_tab index corresponding to pow(2, 0);
 
 /**
  * Long Term Prediction
@@ -292,8 +293,6 @@ typedef struct {
      * @{
      */
     float *output_data[MAX_CHANNELS];                 ///< Points to each element's 'ret' buffer (PCM output).
-    float sf_scale;                                   ///< Pre-scale for correct IMDCT and dsp.float_to_int16.
-    int sf_offset;                                    ///< offset into pow2sf_tab as appropriate for dsp.float_to_int16
     /** @} */
 
     DECLARE_ALIGNED(32, float, temp)[128];
diff --git a/libavcodec/aac_tablegen.h b/libavcodec/aac_tablegen.h
index 3a820ba673..27fa0e7ba3 100644
--- a/libavcodec/aac_tablegen.h
+++ b/libavcodec/aac_tablegen.h
@@ -29,13 +29,14 @@
 #include "libavcodec/aac_tables.h"
 #else
 #include "libavutil/mathematics.h"
+#include "libavcodec/aac.h"
 float ff_aac_pow2sf_tab[428];
 
 void ff_aac_tableinit(void)
 {
     int i;
     for (i = 0; i < 428; i++)
-        ff_aac_pow2sf_tab[i] = pow(2, (i - 200) / 4.);
+        ff_aac_pow2sf_tab[i] = pow(2, (i - POW_SF2_ZERO) / 4.);
 }
 #endif /* CONFIG_HARDCODED_TABLES */
 
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 35b31c708a..6d55acbc45 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -109,8 +109,8 @@ static av_always_inline float quantize_and_encode_band_cost_template(
                                 int *bits, int BT_ZERO, int BT_UNSIGNED,
                                 int BT_PAIR, int BT_ESC)
 {
-    const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
-    const float  Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float IQ = ff_aac_pow2sf_tab[POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
+    const float  Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
     const float CLIPPED_ESCAPE = 165140.0f*IQ;
     int i, j;
     float cost = 0;
@@ -281,7 +281,7 @@ static float find_max_val(int group_len, int swb_size, const float *scaled) {
 }
 
 static int find_min_book(float maxval, int sf) {
-    float Q = ff_aac_pow2sf_tab[200 - sf + SCALE_ONE_POS - SCALE_DIV_512];
+    float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
     float Q34 = sqrtf(Q * sqrtf(Q));
     int qmaxval, cb;
     qmaxval = maxval * Q34 + 0.4054f;
@@ -956,7 +956,7 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
                     dist -= b;
                 }
                 dist *= 1.0f / 512.0f / lambda;
-                quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[200 - scf + SCALE_ONE_POS - SCALE_DIV_512]);
+                quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[POW_SF2_ZERO - scf + SCALE_ONE_POS - SCALE_DIV_512]);
                 if (quant_max >= 8191) { // too much, return to the previous quantizer
                     sce->sf_idx[w*16+g] = prev_scf;
                     break;
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index 96b1323c19..76b14a194c 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -579,12 +579,6 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
 
     ac->random_state = 0x1f2e3d4c;
 
-    // -1024 - Compensate wrong IMDCT method.
-    // 60    - Required to scale values to the correct range [-32768,32767]
-    //         for float to int16 conversion. (1 << (60 / 4)) == 32768
-    ac->sf_scale  = 1. / -1024.;
-    ac->sf_offset = 60;
-
     ff_aac_tableinit();
 
     INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
@@ -592,9 +586,9 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
                     ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
                     352);
 
-    ff_mdct_init(&ac->mdct,       11, 1, 1.0);
-    ff_mdct_init(&ac->mdct_small,  8, 1, 1.0);
-    ff_mdct_init(&ac->mdct_ltp,   11, 0, 1.0);
+    ff_mdct_init(&ac->mdct,       11, 1, 1.0/1024.0);
+    ff_mdct_init(&ac->mdct_small,  8, 1, 1.0/128.0);
+    ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0);
     // window initialization
     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
@@ -652,7 +646,7 @@ static void decode_ltp(AACContext *ac, LongTermPrediction *ltp,
     int sfb;
 
     ltp->lag  = get_bits(gb, 11);
-    ltp->coef = ltp_coef[get_bits(gb, 3)] * ac->sf_scale;
+    ltp->coef = ltp_coef[get_bits(gb, 3)];
     for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
         ltp->used[sfb] = get_bits1(gb);
 }
@@ -790,9 +784,9 @@ static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
                                enum BandType band_type[120],
                                int band_type_run_end[120])
 {
-    const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
     int g, i, idx = 0;
-    int offset[3] = { global_gain, global_gain - 90, 100 };
+    int offset[3] = { global_gain, global_gain - 90, 0 };
+    int clipped_offset;
     int noise_flag = 1;
     static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
     for (g = 0; g < ics->num_window_groups; g++) {
@@ -804,12 +798,14 @@ static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
             } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
                 for (; i < run_end; i++, idx++) {
                     offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
-                    if (offset[2] > 255U) {
-                        av_log(ac->avctx, AV_LOG_ERROR,
-                               "%s (%d) out of range.\n", sf_str[2], offset[2]);
-                        return -1;
+                    clipped_offset = av_clip(offset[2], -155, 100);
+                    if (offset[2] != clipped_offset) {
+                        av_log_ask_for_sample(ac->avctx, "Intensity stereo "
+                                "position clipped (%d -> %d).\nIf you heard an "
+                                "audible artifact, there may be a bug in the "
+                                "decoder. ", offset[2], clipped_offset);
                     }
-                    sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
+                    sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
                 }
             } else if (band_type[idx] == NOISE_BT) {
                 for (; i < run_end; i++, idx++) {
@@ -817,12 +813,14 @@ static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
                         offset[1] += get_bits(gb, 9) - 256;
                     else
                         offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
-                    if (offset[1] > 255U) {
-                        av_log(ac->avctx, AV_LOG_ERROR,
-                               "%s (%d) out of range.\n", sf_str[1], offset[1]);
-                        return -1;
+                    clipped_offset = av_clip(offset[1], -100, 155);
+                    if (offset[2] != clipped_offset) {
+                        av_log_ask_for_sample(ac->avctx, "Noise gain clipped "
+                                "(%d -> %d).\nIf you heard an audible "
+                                "artifact, there may be a bug in the decoder. ",
+                                offset[1], clipped_offset);
                     }
-                    sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
+                    sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
                 }
             } else {
                 for (; i < run_end; i++, idx++) {
@@ -832,7 +830,7 @@ static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
                                "%s (%d) out of range.\n", sf_str[0], offset[0]);
                         return -1;
                     }
-                    sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
+                    sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
                 }
             }
         }
@@ -1243,7 +1241,6 @@ static av_always_inline float flt16_trunc(float pf)
 }
 
 static av_always_inline void predict(PredictorState *ps, float *coef,
-                                     float sf_scale, float inv_sf_scale,
                                      int output_enable)
 {
     const float a     = 0.953125; // 61.0 / 64
@@ -1260,9 +1257,9 @@ static av_always_inline void predict(PredictorState *ps, float *coef,
 
     pv = flt16_round(k1 * r0 + k2 * r1);
     if (output_enable)
-        *coef += pv * sf_scale;
+        *coef += pv;
 
-    e0 = *coef * inv_sf_scale;
+    e0 = *coef;
     e1 = e0 - k1 * r0;
 
     ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
@@ -1280,7 +1277,6 @@ static av_always_inline void predict(PredictorState *ps, float *coef,
 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
 {
     int sfb, k;
-    float sf_scale = ac->sf_scale, inv_sf_scale = 1 / ac->sf_scale;
 
     if (!sce->ics.predictor_initialized) {
         reset_all_predictors(sce->predictor_state);
@@ -1291,7 +1287,6 @@ static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
         for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
             for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
                 predict(&sce->predictor_state[k], &sce->coeffs[k],
-                        sf_scale, inv_sf_scale,
                         sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
             }
         }
diff --git a/libavcodec/aacdectab.h b/libavcodec/aacdectab.h
index 0bccb84cb7..22ae00ff32 100644
--- a/libavcodec/aacdectab.h
+++ b/libavcodec/aacdectab.h
@@ -36,11 +36,11 @@
 #include <stdint.h>
 
 /* @name ltp_coef
- * Table of the LTP coefficient (multiplied by 2)
+ * Table of the LTP coefficients
  */
 static const float ltp_coef[8] = {
-     1.141658,    1.393232,    1.626008,    1.822608,
-     1.969800,    2.135788,    2.2389202,   2.739066,
+    0.570829, 0.696616, 0.813004, 0.911304,
+    0.984900, 1.067894, 1.194601, 1.369533,
 };
 
 /* @name tns_tmp2_map
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index b0b4075a36..62a4ba161a 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -1962,8 +1962,6 @@ static av_cold int set_channel_info(AC3EncodeContext *s, int channels,
     ch_layout = *channel_layout;
     if (!ch_layout)
         ch_layout = avcodec_guess_channel_layout(channels, CODEC_ID_AC3, NULL);
-    if (av_get_channel_layout_nb_channels(ch_layout) != channels)
-        return AVERROR(EINVAL);
 
     s->lfe_on       = !!(ch_layout & AV_CH_LOW_FREQUENCY);
     s->channels     = channels;
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 3e7260ede2..1fa92215d5 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -108,6 +108,7 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC  (FFV1, ffv1);
     REGISTER_ENCDEC  (FFVHUFF, ffvhuff);
     REGISTER_ENCDEC  (FLASHSV, flashsv);
+    REGISTER_ENCODER (FLASHSV2, flashsv2);
     REGISTER_DECODER (FLIC, flic);
     REGISTER_ENCDEC  (FLV, flv);
     REGISTER_DECODER (FOURXM, fourxm);
diff --git a/libavcodec/alpha/simple_idct_alpha.c b/libavcodec/alpha/simple_idct_alpha.c
index 0c0689ae73..7f396bfe5f 100644
--- a/libavcodec/alpha/simple_idct_alpha.c
+++ b/libavcodec/alpha/simple_idct_alpha.c
@@ -33,13 +33,13 @@
 // cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
 // W4 is actually exactly 16384, but using 16383 works around
 // accumulating rounding errors for some encoders
-#define W1 ((int_fast32_t) 22725)
-#define W2 ((int_fast32_t) 21407)
-#define W3 ((int_fast32_t) 19266)
-#define W4 ((int_fast32_t) 16383)
-#define W5 ((int_fast32_t) 12873)
-#define W6 ((int_fast32_t)  8867)
-#define W7 ((int_fast32_t)  4520)
+#define W1 22725
+#define W2 21407
+#define W3 19266
+#define W4 16383
+#define W5 12873
+#define W6  8867
+#define W7  4520
 #define ROW_SHIFT 11
 #define COL_SHIFT 20
 
diff --git a/libavcodec/asv1.c b/libavcodec/asv1.c
index d2005fff60..a0dc821367 100644
--- a/libavcodec/asv1.c
+++ b/libavcodec/asv1.c
@@ -450,17 +450,6 @@ static int decode_frame(AVCodecContext *avctx,
             idct_put(a, mb_x, mb_y);
         }
     }
-#if 0
-int i;
-printf("%d %d\n", 8*buf_size, get_bits_count(&a->gb));
-for(i=get_bits_count(&a->gb); i<8*buf_size; i++){
-    printf("%d", get_bits1(&a->gb));
-}
-
-for(i=0; i<s->avctx->extradata_size; i++){
-    printf("%c\n", ((uint8_t*)s->avctx->extradata)[i]);
-}
-#endif
 
     *picture= *(AVFrame*)&a->picture;
     *data_size = sizeof(AVPicture);
diff --git a/libavcodec/bmp.c b/libavcodec/bmp.c
index c02aac6cb7..f4a6b769c5 100644
--- a/libavcodec/bmp.c
+++ b/libavcodec/bmp.c
@@ -245,7 +245,7 @@ static int bmp_decode_frame(AVCodecContext *avctx,
         buf = buf0 + 14 + ihsize; //palette location
         if((hsize-ihsize-14) < (colors << 2)){ // OS/2 bitmap, 3 bytes per palette entry
             for(i = 0; i < colors; i++)
-                ((uint32_t*)p->data[1])[i] = bytestream_get_le24(&buf);
+                ((uint32_t*)p->data[1])[i] = (0xff<<24) | bytestream_get_le24(&buf);
         }else{
             for(i = 0; i < colors; i++)
                 ((uint32_t*)p->data[1])[i] = bytestream_get_le32(&buf);
diff --git a/libavcodec/cdgraphics.c b/libavcodec/cdgraphics.c
index 68f556b3f3..aae7bbbb1b 100644
--- a/libavcodec/cdgraphics.c
+++ b/libavcodec/cdgraphics.c
@@ -377,6 +377,5 @@ AVCodec ff_cdgraphics_decoder = {
     cdg_decode_end,
     cdg_decode_frame,
     CODEC_CAP_DR1,
-    .max_lowres = 5,
     .long_name = NULL_IF_CONFIG_SMALL("CD Graphics video"),
 };
diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c
index 1a2d60c672..3c8021748f 100644
--- a/libavcodec/crystalhd.c
+++ b/libavcodec/crystalhd.c
@@ -142,6 +142,7 @@ typedef struct {
 
     /* Options */
     uint32_t sWidth;
+    uint8_t bframe_bug;
 } CHDContext;
 
 static const AVOption options[] = {
@@ -744,7 +745,7 @@ static inline CopyRet receive_frame(AVCodecContext *avctx,
             }
 
             if (avctx->codec->id == CODEC_ID_MPEG4 &&
-                output.PicInfo.timeStamp == 0) {
+                output.PicInfo.timeStamp == 0 && priv->bframe_bug) {
                 av_log(avctx, AV_LOG_VERBOSE,
                        "CrystalHD: Not returning packed frame twice.\n");
                 priv->last_picture++;
@@ -810,6 +811,22 @@ static int decode(AVCodecContext *avctx, void *data, int *data_size, AVPacket *a
 
     av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: decode_frame\n");
 
+    if (avpkt->size == 7 && !priv->bframe_bug) {
+        /*
+         * The use of a drop frame triggers the bug
+         */
+        av_log(avctx, AV_LOG_INFO,
+               "CrystalHD: Enabling work-around for packed b-frame bug\n");
+        priv->bframe_bug = 1;
+    } else if (avpkt->size == 8 && priv->bframe_bug) {
+        /*
+         * Delay frames don't trigger the bug
+         */
+        av_log(avctx, AV_LOG_INFO,
+               "CrystalHD: Disabling work-around for packed b-frame bug\n");
+        priv->bframe_bug = 0;
+    }
+
     if (len) {
         int32_t tx_free = (int32_t)DtsTxFreeSize(dev);
 
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 90e6440a87..162458a7b5 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -38,7 +38,6 @@
 #include "config.h"
 #include "ac3dec.h"
 #include "vorbis.h"
-#include "png.h"
 
 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
 uint32_t ff_squareTbl[512] = {0, };
@@ -1924,17 +1923,6 @@ static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
         dst[i+0] += src[i+0];
 }
 
-static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
-    long i;
-    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
-        long a = *(long*)(src1+i);
-        long b = *(long*)(src2+i);
-        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
-    }
-    for(; i<w; i++)
-        dst[i] = src1[i]+src2[i];
-}
-
 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
     long i;
 #if !HAVE_FAST_UNALIGNED
@@ -2112,13 +2100,6 @@ static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t
             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
             +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
     }
-#if 0
-static int maxi=0;
-if(sum>maxi){
-    maxi=sum;
-    printf("MAX:%d\n", maxi);
-}
-#endif
     return sum;
 }
 
@@ -3087,7 +3068,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
 
     c->add_bytes= add_bytes_c;
-    c->add_bytes_l2= add_bytes_l2_c;
     c->diff_bytes= diff_bytes_c;
     c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
     c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
@@ -3095,9 +3075,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
     c->bswap_buf= bswap_buf;
     c->bswap16_buf = bswap16_buf;
-#if CONFIG_PNG_DECODER
-    c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
-#endif
 
     if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
         c->h263_h_loop_filter= h263_h_loop_filter_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index d2e64b0e97..02a8a1a3a5 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -379,7 +379,6 @@ typedef struct DSPContext {
 
     /* huffyuv specific */
     void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
-    void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w);
     void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
     /**
      * subtract huffyuv's variant of median prediction
@@ -390,7 +389,6 @@ typedef struct DSPContext {
     int  (*add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, int w, int left);
     void (*add_hfyu_left_prediction_bgr32)(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha);
     /* this might write to dst[w] */
-    void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
     void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
     void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len);
 
diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
index ed31582199..dc015b9f6a 100644
--- a/libavcodec/error_resilience.c
+++ b/libavcodec/error_resilience.c
@@ -786,6 +786,7 @@ void ff_er_frame_end(MpegEncContext *s){
         }
     }
 
+#if 1
     /* handle overlapping slices */
     for(error_type=1; error_type<=3; error_type++){
         int end_ok=0;
@@ -806,7 +807,8 @@ void ff_er_frame_end(MpegEncContext *s){
                 end_ok=0;
         }
     }
-
+#endif
+#if 1
     /* handle slices with partitions of different length */
     if(s->partitioned_frame){
         int end_ok=0;
@@ -827,7 +829,7 @@ void ff_er_frame_end(MpegEncContext *s){
                 end_ok=0;
         }
     }
-
+#endif
     /* handle missing slices */
     if(s->error_recognition>=4){
         int end_ok=1;
@@ -851,6 +853,7 @@ void ff_er_frame_end(MpegEncContext *s){
         }
     }
 
+#if 1
     /* backward mark errors */
     distance=9999999;
     for(error_type=1; error_type<=3; error_type++){
@@ -875,6 +878,7 @@ void ff_er_frame_end(MpegEncContext *s){
                 distance= 9999999;
         }
     }
+#endif
 
     /* forward mark errors */
     error=0;
@@ -889,7 +893,7 @@ void ff_er_frame_end(MpegEncContext *s){
             s->error_status_table[mb_xy]|= error;
         }
     }
-
+#if 1
     /* handle not partitioned case */
     if(!s->partitioned_frame){
         for(i=0; i<s->mb_num; i++){
@@ -900,6 +904,7 @@ void ff_er_frame_end(MpegEncContext *s){
             s->error_status_table[mb_xy]= error;
         }
     }
+#endif
 
     dc_error= ac_error= mv_error=0;
     for(i=0; i<s->mb_num; i++){
@@ -1060,15 +1065,16 @@ void ff_er_frame_end(MpegEncContext *s){
             s->dc_val[2][mb_x + mb_y*s->mb_stride]= (dcv+4)>>3;
         }
     }
-
+#if 1
     /* guess DC for damaged blocks */
     guess_dc(s, s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride, 1);
     guess_dc(s, s->dc_val[1], s->mb_width  , s->mb_height  , s->mb_stride, 0);
     guess_dc(s, s->dc_val[2], s->mb_width  , s->mb_height  , s->mb_stride, 0);
-
+#endif
     /* filter luma DC */
     filter181(s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride);
 
+#if 1
     /* render DC only intra */
     for(mb_y=0; mb_y<s->mb_height; mb_y++){
         for(mb_x=0; mb_x<s->mb_width; mb_x++){
@@ -1088,6 +1094,7 @@ void ff_er_frame_end(MpegEncContext *s){
             put_dc(s, dest_y, dest_cb, dest_cr, mb_x, mb_y);
         }
     }
+#endif
 
     if(s->avctx->error_concealment&FF_EC_DEBLOCK){
         /* filter horizontal block boundaries */
diff --git a/libavcodec/flac.c b/libavcodec/flac.c
index 6e94c2c5ff..5ed3ef7fc3 100644
--- a/libavcodec/flac.c
+++ b/libavcodec/flac.c
@@ -60,6 +60,7 @@ int ff_flac_decode_frame_header(AVCodecContext *avctx, GetBitContext *gb,
         fi->ch_mode = FLAC_CHMODE_INDEPENDENT;
     } else if (fi->ch_mode <= FLAC_CHMODE_MID_SIDE) {
         fi->channels = 2;
+        avctx->channel_layout = AV_CH_LAYOUT_STEREO;
     } else {
         av_log(avctx, AV_LOG_ERROR + log_level_offset,
                "invalid channel mode: %d\n", fi->ch_mode);
diff --git a/libavcodec/flashsv2enc.c b/libavcodec/flashsv2enc.c
new file mode 100644
index 0000000000..7ee299cf7c
--- /dev/null
+++ b/libavcodec/flashsv2enc.c
@@ -0,0 +1,907 @@
+/*
+ * Flash Screen Video Version 2 encoder
+ * Copyright (C) 2009 Joshua Warner
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/flashsv2enc.c
+ * Flash Screen Video Version 2 encoder
+ * @author Joshua Warner
+ */
+
+/* Differences from version 1 stream:
+ * NOTE: Currently, the only player that supports version 2 streams is Adobe Flash Player itself.
+ * * Supports sending only a range of scanlines in a block,
+ *   indicating a difference from the corresponding block in the last keyframe.
+ * * Supports initializing the zlib dictionary with data from the corresponding
+ *   block in the last keyframe, to improve compression.
+ * * Supports a hybrid 15-bit rgb / 7-bit palette color space.
+ */
+
+/* TODO:
+ * Don't keep Block structures for both current frame and keyframe.
+ * Make better heuristics for deciding stream parameters (optimum_* functions).  Currently these return constants.
+ * Figure out how to encode palette information in the stream, choose an optimum palette at each keyframe.
+ * Figure out how the zlibPrimeCompressCurrent flag works, implement support.
+ * Find other sample files (that weren't generated here), develop a decoder.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <zlib.h>
+
+#include "libavutil/imgutils.h"
+#include "avcodec.h"
+#include "put_bits.h"
+#include "bytestream.h"
+
+#define HAS_IFRAME_IMAGE 0x02
+#define HAS_PALLET_INFO 0x01
+
+#define COLORSPACE_BGR 0x00
+#define COLORSPACE_15_7 0x10
+#define HAS_DIFF_BLOCKS 0x04
+#define ZLIB_PRIME_COMPRESS_CURRENT 0x02
+#define ZLIB_PRIME_COMPRESS_PREVIOUS 0x01
+
+// Disables experimental "smart" parameter-choosing code, as well as the statistics that it depends on.
+// At the moment, the "smart" code is a great example of how the parameters *shouldn't* be chosen.
+#define FLASHSV2_DUMB
+
+typedef struct Block {
+    uint8_t *enc;
+    uint8_t *sl_begin, *sl_end;
+    int enc_size;
+    uint8_t *data;
+    unsigned long data_size;
+
+    uint8_t start, len;
+    uint8_t dirty;
+    uint8_t col, row, width, height;
+    uint8_t flags;
+} Block;
+
+typedef struct Palette {
+    unsigned colors[128];
+    uint8_t index[1 << 15];
+} Palette;
+
+typedef struct FlashSV2Context {
+    AVCodecContext *avctx;
+    uint8_t *current_frame;
+    uint8_t *key_frame;
+    AVFrame frame;
+    uint8_t *encbuffer;
+    uint8_t *keybuffer;
+    uint8_t *databuffer;
+
+    Block *frame_blocks;
+    Block *key_blocks;
+    int frame_size;
+    int blocks_size;
+
+    int use15_7, dist, comp;
+
+    int rows, cols;
+
+    int last_key_frame;
+
+    int image_width, image_height;
+    int block_width, block_height;
+    uint8_t flags;
+    uint8_t use_custom_palette;
+    uint8_t palette_type;       ///< 0=>default, 1=>custom - changed when palette regenerated.
+    Palette palette;
+#ifndef FLASHSV2_DUMB
+    double tot_blocks;          ///< blocks encoded since last keyframe
+    double diff_blocks;         ///< blocks that were different since last keyframe
+    double tot_lines;           ///< total scanlines in image since last keyframe
+    double diff_lines;          ///< scanlines that were different since last keyframe
+    double raw_size;            ///< size of raw frames since last keyframe
+    double comp_size;           ///< size of compressed data since last keyframe
+    double uncomp_size;         ///< size of uncompressed data since last keyframe
+
+    double total_bits;          ///< total bits written to stream so far
+#endif
+} FlashSV2Context;
+
+static av_cold void cleanup(FlashSV2Context * s)
+{
+    av_freep(&s->encbuffer);
+    av_freep(&s->keybuffer);
+    av_freep(&s->databuffer);
+    av_freep(&s->current_frame);
+    av_freep(&s->key_frame);
+
+    av_freep(&s->frame_blocks);
+    av_freep(&s->key_blocks);
+}
+
+static void init_blocks(FlashSV2Context * s, Block * blocks,
+                        uint8_t * encbuf, uint8_t * databuf)
+{
+    int row, col;
+    Block *b;
+    for (col = 0; col < s->cols; col++) {
+        for (row = 0; row < s->rows; row++) {
+            b = blocks + (col + row * s->cols);
+            b->width = (col < s->cols - 1) ?
+                s->block_width :
+                s->image_width - col * s->block_width;
+
+            b->height = (row < s->rows - 1) ?
+                s->block_height :
+                s->image_height - row * s->block_height;
+
+            b->row   = row;
+            b->col   = col;
+            b->enc   = encbuf;
+            b->data  = databuf;
+            encbuf  += b->width * b->height * 3;
+            databuf += !databuf ? 0 : b->width * b->height * 6;
+        }
+    }
+}
+
+static void reset_stats(FlashSV2Context * s)
+{
+#ifndef FLASHSV2_DUMB
+    s->diff_blocks = 0.1;
+    s->tot_blocks = 1;
+    s->diff_lines = 0.1;
+    s->tot_lines = 1;
+    s->raw_size = s->comp_size = s->uncomp_size = 10;
+#endif
+}
+
+static av_cold int flashsv2_encode_init(AVCodecContext * avctx)
+{
+    FlashSV2Context *s = avctx->priv_data;
+
+    s->avctx = avctx;
+
+    s->comp = avctx->compression_level;
+    if (s->comp == -1)
+        s->comp = 9;
+    if (s->comp < 0 || s->comp > 9) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Compression level should be 0-9, not %d\n", s->comp);
+        return -1;
+    }
+
+
+    if ((avctx->width > 4095) || (avctx->height > 4095)) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Input dimensions too large, input must be max 4096x4096 !\n");
+        return -1;
+    }
+
+    if (av_image_check_size(avctx->width, avctx->height, 0, avctx) < 0)
+        return -1;
+
+
+    s->last_key_frame = 0;
+
+    s->image_width  = avctx->width;
+    s->image_height = avctx->height;
+
+    s->block_width  = (s->image_width /  12) & ~15;
+    s->block_height = (s->image_height / 12) & ~15;
+
+    s->rows = (s->image_height + s->block_height - 1) / s->block_height;
+    s->cols = (s->image_width +  s->block_width -  1) / s->block_width;
+
+    s->frame_size  = s->image_width * s->image_height * 3;
+    s->blocks_size = s->rows * s->cols * sizeof(Block);
+
+    s->encbuffer     = av_mallocz(s->frame_size);
+    s->keybuffer     = av_mallocz(s->frame_size);
+    s->databuffer    = av_mallocz(s->frame_size * 6);
+    s->current_frame = av_mallocz(s->frame_size);
+    s->key_frame     = av_mallocz(s->frame_size);
+    s->frame_blocks  = av_mallocz(s->blocks_size);
+    s->key_blocks    = av_mallocz(s->blocks_size);
+
+    init_blocks(s, s->frame_blocks, s->encbuffer, s->databuffer);
+    init_blocks(s, s->key_blocks,   s->keybuffer, 0);
+    reset_stats(s);
+#ifndef FLASHSV2_DUMB
+    s->total_bits = 1;
+#endif
+
+    s->use_custom_palette =  0;
+    s->palette_type       = -1;        // so that the palette will be generated in reconfigure_at_keyframe
+
+    if (!s->encbuffer || !s->keybuffer || !s->databuffer
+        || !s->current_frame || !s->key_frame || !s->key_blocks
+        || !s->frame_blocks) {
+        av_log(avctx, AV_LOG_ERROR, "Memory allocation failed.\n");
+        cleanup(s);
+        return -1;
+    }
+
+    return 0;
+}
+
+static int new_key_frame(FlashSV2Context * s)
+{
+    int i;
+    memcpy(s->key_blocks, s->frame_blocks, s->blocks_size);
+    memcpy(s->key_frame, s->current_frame, s->frame_size);
+
+    for (i = 0; i < s->rows * s->cols; i++) {
+        s->key_blocks[i].enc += (s->keybuffer - s->encbuffer);
+        s->key_blocks[i].sl_begin = 0;
+        s->key_blocks[i].sl_end   = 0;
+        s->key_blocks[i].data     = 0;
+    }
+    FFSWAP(uint8_t * , s->keybuffer, s->encbuffer);
+
+    return 0;
+}
+
+static int write_palette(FlashSV2Context * s, uint8_t * buf, int buf_size)
+{
+    //this isn't implemented yet!  Default palette only!
+    return -1;
+}
+
+static int write_header(FlashSV2Context * s, uint8_t * buf, int buf_size)
+{
+    PutBitContext pb;
+    int buf_pos, len;
+
+    if (buf_size < 5)
+        return -1;
+
+    init_put_bits(&pb, buf, buf_size * 8);
+
+    put_bits(&pb, 4, (s->block_width  >> 4) - 1);
+    put_bits(&pb, 12, s->image_width);
+    put_bits(&pb, 4, (s->block_height >> 4) - 1);
+    put_bits(&pb, 12, s->image_height);
+
+    flush_put_bits(&pb);
+    buf_pos = 4;
+
+    buf[buf_pos++] = s->flags;
+
+    if (s->flags & HAS_PALLET_INFO) {
+        len = write_palette(s, buf + buf_pos, buf_size - buf_pos);
+        if (len < 0)
+            return -1;
+        buf_pos += len;
+    }
+
+    return buf_pos;
+}
+
+static int write_block(Block * b, uint8_t * buf, int buf_size)
+{
+    int buf_pos = 0;
+    unsigned block_size = b->data_size;
+
+    if (b->flags & HAS_DIFF_BLOCKS)
+        block_size += 2;
+    if (b->flags & ZLIB_PRIME_COMPRESS_CURRENT)
+        block_size += 2;
+    if (block_size > 0)
+        block_size += 1;
+    if (buf_size < block_size + 2)
+        return -1;
+
+    buf[buf_pos++] = block_size >> 8;
+    buf[buf_pos++] = block_size;
+
+    if (block_size == 0)
+        return buf_pos;
+
+    buf[buf_pos++] = b->flags;
+
+    if (b->flags & HAS_DIFF_BLOCKS) {
+        buf[buf_pos++] = (b->start);
+        buf[buf_pos++] = (b->len);
+    }
+
+    if (b->flags & ZLIB_PRIME_COMPRESS_CURRENT) {
+        //This feature of the format is poorly understood, and as of now, unused.
+        buf[buf_pos++] = (b->col);
+        buf[buf_pos++] = (b->row);
+    }
+
+    memcpy(buf + buf_pos, b->data, b->data_size);
+
+    buf_pos += b->data_size;
+
+    return buf_pos;
+}
+
+static int encode_zlib(Block * b, uint8_t * buf, unsigned long *buf_size, int comp)
+{
+    int res = compress2(buf, buf_size, b->sl_begin, b->sl_end - b->sl_begin, comp);
+    return res == Z_OK ? 0 : -1;
+}
+
+static int encode_zlibprime(Block * b, Block * prime, uint8_t * buf,
+                            int *buf_size, int comp)
+{
+    z_stream s;
+    int res;
+    s.zalloc = NULL;
+    s.zfree  = NULL;
+    s.opaque = NULL;
+    res = deflateInit(&s, comp);
+    if (res < 0)
+        return -1;
+
+    s.next_in  = prime->enc;
+    s.avail_in = prime->enc_size;
+    while (s.avail_in > 0) {
+        s.next_out  = buf;
+        s.avail_out = *buf_size;
+        res = deflate(&s, Z_SYNC_FLUSH);
+        if (res < 0)
+            return -1;
+    }
+
+    s.next_in   = b->sl_begin;
+    s.avail_in  = b->sl_end - b->sl_begin;
+    s.next_out  = buf;
+    s.avail_out = *buf_size;
+    res = deflate(&s, Z_FINISH);
+    deflateEnd(&s);
+    *buf_size -= s.avail_out;
+    if (res != Z_STREAM_END)
+        return -1;
+    return 0;
+}
+
+static int encode_bgr(Block * b, const uint8_t * src, int stride)
+{
+    int i;
+    uint8_t *ptr = b->enc;
+    for (i = 0; i < b->start; i++)
+        memcpy(ptr + i * b->width * 3, src + i * stride, b->width * 3);
+    b->sl_begin = ptr + i * b->width * 3;
+    for (; i < b->start + b->len; i++)
+        memcpy(ptr + i * b->width * 3, src + i * stride, b->width * 3);
+    b->sl_end = ptr + i * b->width * 3;
+    for (; i < b->height; i++)
+        memcpy(ptr + i * b->width * 3, src + i * stride, b->width * 3);
+    b->enc_size = ptr + i * b->width * 3 - b->enc;
+    return b->enc_size;
+}
+
+static inline unsigned pixel_color15(const uint8_t * src)
+{
+    return (src[0] >> 3) | ((src[1] & 0xf8) << 2) | ((src[2] & 0xf8) << 7);
+}
+
+static inline unsigned int chroma_diff(unsigned int c1, unsigned int c2)
+{
+    unsigned int t1 = (c1 & 0x000000ff) + ((c1 & 0x0000ff00) >> 8) + ((c1 & 0x00ff0000) >> 16);
+    unsigned int t2 = (c2 & 0x000000ff) + ((c2 & 0x0000ff00) >> 8) + ((c2 & 0x00ff0000) >> 16);
+
+    return abs(t1 - t2) + abs((c1 & 0x000000ff) - (c2 & 0x000000ff)) +
+        abs(((c1 & 0x0000ff00) >> 8) - ((c2 & 0x0000ff00) >> 8)) +
+        abs(((c1 & 0x00ff0000) >> 16) - ((c2 & 0x00ff0000) >> 16));
+}
+
+static inline int pixel_color7_fast(Palette * palette, unsigned c15)
+{
+    return palette->index[c15];
+}
+
+static int pixel_color7_slow(Palette * palette, unsigned color)
+{
+    int i, min = 0x7fffffff;
+    int minc = -1;
+    for (i = 0; i < 128; i++) {
+        int c1 = palette->colors[i];
+        int diff = chroma_diff(c1, color);
+        if (diff < min) {
+            min = diff;
+            minc = i;
+        }
+    }
+    return minc;
+}
+
+static inline unsigned pixel_bgr(const uint8_t * src)
+{
+    return (src[0]) | (src[1] << 8) | (src[2] << 16);
+}
+
+static int write_pixel_15_7(Palette * palette, uint8_t * dest, const uint8_t * src,
+                            int dist)
+{
+    unsigned c15 = pixel_color15(src);
+    unsigned color = pixel_bgr(src);
+    int d15 = chroma_diff(color, color & 0x00f8f8f8);
+    int c7 = pixel_color7_fast(palette, c15);
+    int d7 = chroma_diff(color, palette->colors[c7]);
+    if (dist + d15 >= d7) {
+        dest[0] = c7;
+        return 1;
+    } else {
+        dest[0] = 0x80 | (c15 >> 8);
+        dest[1] = c15 & 0xff;
+        return 2;
+    }
+}
+
+static int update_palette_index(Palette * palette)
+{
+    int r, g, b;
+    unsigned int bgr, c15, index;
+    for (r = 4; r < 256; r += 8) {
+        for (g = 4; g < 256; g += 8) {
+            for (b = 4; b < 256; b += 8) {
+                bgr = b | (g << 8) | (r << 16);
+                c15 = (b >> 3) | ((g & 0xf8) << 2) | ((r & 0xf8) << 7);
+                index = pixel_color7_slow(palette, bgr);
+
+                palette->index[c15] = index;
+            }
+        }
+    }
+    return 0;
+}
+
+static const unsigned int default_screen_video_v2_palette[128] = {
+    0x00000000, 0x00333333, 0x00666666, 0x00999999, 0x00CCCCCC, 0x00FFFFFF,
+    0x00330000, 0x00660000, 0x00990000, 0x00CC0000, 0x00FF0000, 0x00003300,
+    0x00006600, 0x00009900, 0x0000CC00, 0x0000FF00, 0x00000033, 0x00000066,
+    0x00000099, 0x000000CC, 0x000000FF, 0x00333300, 0x00666600, 0x00999900,
+    0x00CCCC00, 0x00FFFF00, 0x00003333, 0x00006666, 0x00009999, 0x0000CCCC,
+    0x0000FFFF, 0x00330033, 0x00660066, 0x00990099, 0x00CC00CC, 0x00FF00FF,
+    0x00FFFF33, 0x00FFFF66, 0x00FFFF99, 0x00FFFFCC, 0x00FF33FF, 0x00FF66FF,
+    0x00FF99FF, 0x00FFCCFF, 0x0033FFFF, 0x0066FFFF, 0x0099FFFF, 0x00CCFFFF,
+    0x00CCCC33, 0x00CCCC66, 0x00CCCC99, 0x00CCCCFF, 0x00CC33CC, 0x00CC66CC,
+    0x00CC99CC, 0x00CCFFCC, 0x0033CCCC, 0x0066CCCC, 0x0099CCCC, 0x00FFCCCC,
+    0x00999933, 0x00999966, 0x009999CC, 0x009999FF, 0x00993399, 0x00996699,
+    0x0099CC99, 0x0099FF99, 0x00339999, 0x00669999, 0x00CC9999, 0x00FF9999,
+    0x00666633, 0x00666699, 0x006666CC, 0x006666FF, 0x00663366, 0x00669966,
+    0x0066CC66, 0x0066FF66, 0x00336666, 0x00996666, 0x00CC6666, 0x00FF6666,
+    0x00333366, 0x00333399, 0x003333CC, 0x003333FF, 0x00336633, 0x00339933,
+    0x0033CC33, 0x0033FF33, 0x00663333, 0x00993333, 0x00CC3333, 0x00FF3333,
+    0x00003366, 0x00336600, 0x00660033, 0x00006633, 0x00330066, 0x00663300,
+    0x00336699, 0x00669933, 0x00993366, 0x00339966, 0x00663399, 0x00996633,
+    0x006699CC, 0x0099CC66, 0x00CC6699, 0x0066CC99, 0x009966CC, 0x00CC9966,
+    0x0099CCFF, 0x00CCFF99, 0x00FF99CC, 0x0099FFCC, 0x00CC99FF, 0x00FFCC99,
+    0x00111111, 0x00222222, 0x00444444, 0x00555555, 0x00AAAAAA, 0x00BBBBBB,
+    0x00DDDDDD, 0x00EEEEEE
+};
+
+static int generate_default_palette(Palette * palette)
+{
+    memcpy(palette->colors, default_screen_video_v2_palette,
+           sizeof(default_screen_video_v2_palette));
+
+    return update_palette_index(palette);
+}
+
+static int generate_optimum_palette(Palette * palette, const uint8_t * image,
+                                   int width, int height, int stride)
+{
+    //this isn't implemented yet!  Default palette only!
+    return -1;
+}
+
+static inline int encode_15_7_sl(Palette * palette, uint8_t * dest,
+                                 const uint8_t * src, int width, int dist)
+{
+    int len = 0, x;
+    for (x = 0; x < width; x++) {
+        len += write_pixel_15_7(palette, dest + len, src + 3 * x, dist);
+    }
+    return len;
+}
+
+static int encode_15_7(Palette * palette, Block * b, const uint8_t * src,
+                       int stride, int dist)
+{
+    int i;
+    uint8_t *ptr = b->enc;
+    for (i = 0; i < b->start; i++)
+        ptr += encode_15_7_sl(palette, ptr, src + i * stride, b->width, dist);
+    b->sl_begin = ptr;
+    for (; i < b->start + b->len; i++)
+        ptr += encode_15_7_sl(palette, ptr, src + i * stride, b->width, dist);
+    b->sl_end = ptr;
+    for (; i < b->height; i++)
+        ptr += encode_15_7_sl(palette, ptr, src + i * stride, b->width, dist);
+    b->enc_size = ptr - b->enc;
+    return b->enc_size;
+}
+
+static int encode_block(Palette * palette, Block * b, Block * prev,
+                        const uint8_t * src, int stride, int comp, int dist,
+                        int keyframe)
+{
+    unsigned buf_size = b->width * b->height * 6;
+    uint8_t buf[buf_size];
+    int res;
+    if (b->flags & COLORSPACE_15_7) {
+        encode_15_7(palette, b, src, stride, dist);
+    } else {
+        encode_bgr(b, src, stride);
+    }
+
+    if (b->len > 0) {
+        b->data_size = buf_size;
+        res = encode_zlib(b, b->data, &b->data_size, comp);
+        if (res)
+            return res;
+
+        if (!keyframe) {
+            res = encode_zlibprime(b, prev, buf, &buf_size, comp);
+            if (res)
+                return res;
+
+            if (buf_size < b->data_size) {
+                b->data_size = buf_size;
+                memcpy(b->data, buf, buf_size);
+                b->flags |= ZLIB_PRIME_COMPRESS_PREVIOUS;
+            }
+        }
+    } else {
+        b->data_size = 0;
+    }
+    return 0;
+}
+
+static int compare_sl(FlashSV2Context * s, Block * b, const uint8_t * src,
+                      uint8_t * frame, uint8_t * key, int y, int keyframe)
+{
+    if (memcmp(src, frame, b->width * 3) != 0) {
+        b->dirty = 1;
+        memcpy(frame, src, b->width * 3);
+#ifndef FLASHSV2_DUMB
+        s->diff_lines++;
+#endif
+    }
+    if (memcmp(src, key, b->width * 3) != 0) {
+        if (b->len == 0)
+            b->start = y;
+        b->len = y + 1 - b->start;
+    }
+    return 0;
+}
+
+static int mark_all_blocks(FlashSV2Context * s, const uint8_t * src, int stride,
+                           int keyframe)
+{
+    int sl, rsl, col, pos, possl;
+    Block *b;
+    for (sl = s->image_height - 1; sl >= 0; sl--) {
+        for (col = 0; col < s->cols; col++) {
+            rsl = s->image_height - sl - 1;
+            b = s->frame_blocks + col + rsl / s->block_height * s->cols;
+            possl = stride * sl + col * s->block_width * 3;
+            pos = s->image_width * rsl * 3 + col * s->block_width * 3;
+            compare_sl(s, b, src + possl, s->current_frame + pos,
+                       s->key_frame + pos, rsl % s->block_height, keyframe);
+        }
+    }
+#ifndef FLASHSV2_DUMB
+    s->tot_lines += s->image_height * s->cols;
+#endif
+    return 0;
+}
+
+static int encode_all_blocks(FlashSV2Context * s, int keyframe)
+{
+    int row, col, res;
+    uint8_t *data;
+    Block *b, *prev;
+    for (row = 0; row < s->rows; row++) {
+        for (col = 0; col < s->cols; col++) {
+            b = s->frame_blocks + (row * s->cols + col);
+            prev = s->key_blocks + (row * s->cols + col);
+            if (keyframe) {
+                b->start = 0;
+                b->len = b->height;
+                b->flags = s->use15_7 ? COLORSPACE_15_7 : 0;
+            } else if (!b->dirty) {
+                b->start = 0;
+                b->len = 0;
+                b->data_size = 0;
+                b->flags = s->use15_7 ? COLORSPACE_15_7 : 0;
+                continue;
+            } else {
+                b->flags = s->use15_7 ? COLORSPACE_15_7 | HAS_DIFF_BLOCKS : HAS_DIFF_BLOCKS;
+            }
+            data = s->current_frame + s->image_width * 3 * s->block_height * row + s->block_width * col * 3;
+            res = encode_block(&s->palette, b, prev, data, s->image_width * 3, s->comp, s->dist, keyframe);
+#ifndef FLASHSV2_DUMB
+            if (b->dirty)
+                s->diff_blocks++;
+            s->comp_size += b->data_size;
+            s->uncomp_size += b->enc_size;
+#endif
+            if (res)
+                return res;
+        }
+    }
+#ifndef FLASHSV2_DUMB
+    s->raw_size += s->image_width * s->image_height * 3;
+    s->tot_blocks += s->rows * s->cols;
+#endif
+    return 0;
+}
+
+static int write_all_blocks(FlashSV2Context * s, uint8_t * buf,
+                            int buf_size)
+{
+    int row, col, buf_pos = 0, len;
+    Block *b;
+    for (row = 0; row < s->rows; row++) {
+        for (col = 0; col < s->cols; col++) {
+            b = s->frame_blocks + row * s->cols + col;
+            len = write_block(b, buf + buf_pos, buf_size - buf_pos);
+            b->start = b->len = b->dirty = 0;
+            if (len < 0)
+                return len;
+            buf_pos += len;
+        }
+    }
+    return buf_pos;
+}
+
+static int write_bitstream(FlashSV2Context * s, const uint8_t * src, int stride,
+                           uint8_t * buf, int buf_size, int keyframe)
+{
+    int buf_pos, res;
+
+    res = mark_all_blocks(s, src, stride, keyframe);
+    if (res)
+        return res;
+    res = encode_all_blocks(s, keyframe);
+    if (res)
+        return res;
+
+    res = write_header(s, buf, buf_size);
+    if (res < 0) {
+        return res;
+    } else {
+        buf_pos = res;
+    }
+    res = write_all_blocks(s, buf + buf_pos, buf_size - buf_pos);
+    if (res < 0)
+        return res;
+    buf_pos += res;
+#ifndef FLASHSV2_DUMB
+    s->total_bits += ((double) buf_pos) * 8.0;
+#endif
+
+    return buf_pos;
+}
+
+static void recommend_keyframe(FlashSV2Context * s, int *keyframe)
+{
+#ifndef FLASHSV2_DUMB
+    double block_ratio, line_ratio, enc_ratio, comp_ratio, data_ratio;
+    if (s->avctx->gop_size > 0) {
+        block_ratio = s->diff_blocks / s->tot_blocks;
+        line_ratio = s->diff_lines / s->tot_lines;
+        enc_ratio = s->uncomp_size / s->raw_size;
+        comp_ratio = s->comp_size / s->uncomp_size;
+        data_ratio = s->comp_size / s->raw_size;
+
+        if ((block_ratio >= 0.5 && line_ratio / block_ratio <= 0.5) || line_ratio >= 0.95) {
+            *keyframe = 1;
+            return;
+        }
+    }
+#else
+    return;
+#endif
+}
+
+static const double block_size_fraction = 1.0 / 300;
+static int optimum_block_width(FlashSV2Context * s)
+{
+#ifndef FLASHSV2_DUMB
+    double save = (1-pow(s->diff_lines/s->diff_blocks/s->block_height, 0.5)) * s->comp_size/s->tot_blocks;
+    double width = block_size_fraction * sqrt(0.5 * save * s->rows * s->cols) * s->image_width;
+    int pwidth = ((int) width);
+    return FFCLIP(pwidth & ~15, 256, 16);
+#else
+    return 64;
+#endif
+}
+
+static int optimum_block_height(FlashSV2Context * s)
+{
+#ifndef FLASHSV2_DUMB
+    double save = (1-pow(s->diff_lines/s->diff_blocks/s->block_height, 0.5)) * s->comp_size/s->tot_blocks;
+    double height = block_size_fraction * sqrt(0.5 * save * s->rows * s->cols) * s->image_height;
+    int pheight = ((int) height);
+    return FFCLIP(pheight & ~15, 256, 16);
+#else
+    return 64;
+#endif
+}
+
+static const double use15_7_threshold = 8192;
+
+static int optimum_use15_7(FlashSV2Context * s)
+{
+#ifndef FLASHSV2_DUMB
+    double ideal = ((double)(s->avctx->bit_rate * s->avctx->time_base.den * s->avctx->ticks_per_frame)) /
+        ((double) s->avctx->time_base.num) * s->avctx->frame_number;
+    if (ideal + use15_7_threshold < s->total_bits) {
+        return 1;
+    } else {
+        return 0;
+    }
+#else
+    return s->avctx->global_quality == 0;
+#endif
+}
+
+static const double color15_7_factor = 100;
+
+static int optimum_dist(FlashSV2Context * s)
+{
+#ifndef FLASHSV2_DUMB
+    double ideal =
+        s->avctx->bit_rate * s->avctx->time_base.den *
+        s->avctx->ticks_per_frame;
+    int dist = pow((s->total_bits / ideal) * color15_7_factor, 3);
+    av_log(s->avctx, AV_LOG_DEBUG, "dist: %d\n", dist);
+    return dist;
+#else
+    return 15;
+#endif
+}
+
+
+static int reconfigure_at_keyframe(FlashSV2Context * s, const uint8_t * image,
+                                   int stride)
+{
+    int update_palette = 0;
+    int res;
+    s->block_width = optimum_block_width(s);
+    s->block_height = optimum_block_height(s);
+
+    s->rows = (s->image_height + s->block_height - 1) / s->block_height;
+    s->cols = (s->image_width +  s->block_width -  1) / s->block_width;
+
+    if (s->rows * s->cols != s->blocks_size / sizeof(Block)) {
+        if (s->rows * s->cols > s->blocks_size / sizeof(Block)) {
+            s->frame_blocks = av_realloc(s->frame_blocks, s->rows * s->cols * sizeof(Block));
+            s->key_blocks = av_realloc(s->key_blocks, s->cols * s->rows * sizeof(Block));
+            if (!s->frame_blocks || !s->key_blocks) {
+                av_log(s->avctx, AV_LOG_ERROR, "Memory allocation failed.\n");
+                return -1;
+            }
+            s->blocks_size = s->rows * s->cols * sizeof(Block);
+        }
+        init_blocks(s, s->frame_blocks, s->encbuffer, s->databuffer);
+        init_blocks(s, s->key_blocks, s->keybuffer, 0);
+
+    }
+
+    s->use15_7 = optimum_use15_7(s);
+    if (s->use15_7) {
+        if ((s->use_custom_palette && s->palette_type != 1) || update_palette) {
+            res = generate_optimum_palette(&s->palette, image, s->image_width, s->image_height, stride);
+            if (res)
+                return res;
+            s->palette_type = 1;
+            av_log(s->avctx, AV_LOG_DEBUG, "Generated optimum palette\n");
+        } else if (!s->use_custom_palette && s->palette_type != 0) {
+            res = generate_default_palette(&s->palette);
+            if (res)
+                return res;
+            s->palette_type = 0;
+            av_log(s->avctx, AV_LOG_DEBUG, "Generated default palette\n");
+        }
+    }
+
+
+    reset_stats(s);
+
+    return 0;
+}
+
+static int flashsv2_encode_frame(AVCodecContext * avctx, uint8_t * buf,
+                                 int buf_size, void *data)
+{
+    FlashSV2Context *const s = avctx->priv_data;
+    AVFrame *pict = data;
+    AVFrame *const p = &s->frame;
+    int res;
+    int keyframe = 0;
+
+    *p = *pict;
+
+    /* First frame needs to be a keyframe */
+    if (avctx->frame_number == 0)
+        keyframe = 1;
+
+    /* Check the placement of keyframes */
+    if (avctx->gop_size > 0) {
+        if (avctx->frame_number >= s->last_key_frame + avctx->gop_size)
+            keyframe = 1;
+    }
+
+    if (buf_size < s->frame_size) {
+        //Conservative upper bound check for compressed data
+        av_log(avctx, AV_LOG_ERROR, "buf_size %d <  %d\n", buf_size, s->frame_size);
+        return -1;
+    }
+
+    if (!keyframe
+        && avctx->frame_number > s->last_key_frame + avctx->keyint_min) {
+        recommend_keyframe(s, &keyframe);
+        if (keyframe)
+            av_log(avctx, AV_LOG_DEBUG, "Recommending key frame at frame %d\n", avctx->frame_number);
+    }
+
+    if (keyframe) {
+        res = reconfigure_at_keyframe(s, p->data[0], p->linesize[0]);
+        if (res)
+            return res;
+    }
+
+    if (s->use15_7)
+        s->dist = optimum_dist(s);
+
+    res = write_bitstream(s, p->data[0], p->linesize[0], buf, buf_size, keyframe);
+
+    if (keyframe) {
+        new_key_frame(s);
+        p->pict_type = FF_I_TYPE;
+        p->key_frame = 1;
+        s->last_key_frame = avctx->frame_number;
+        av_log(avctx, AV_LOG_DEBUG, "Inserting key frame at frame %d\n", avctx->frame_number);
+    } else {
+        p->pict_type = FF_P_TYPE;
+        p->key_frame = 0;
+    }
+
+    avctx->coded_frame = p;
+
+    return res;
+}
+
+static av_cold int flashsv2_encode_end(AVCodecContext * avctx)
+{
+    FlashSV2Context *s = avctx->priv_data;
+
+    cleanup(s);
+
+    return 0;
+}
+
+AVCodec ff_flashsv2_encoder = {
+    "flashsv2",
+    AVMEDIA_TYPE_VIDEO,
+    CODEC_ID_FLASHSV2,
+    sizeof(FlashSV2Context),
+    flashsv2_encode_init,
+    flashsv2_encode_frame,
+    flashsv2_encode_end,
+    .pix_fmts = (enum PixelFormat[]) {PIX_FMT_BGR24, PIX_FMT_NONE},
+    .long_name = NULL_IF_CONFIG_SMALL("Flash Screen Video Version 2"),
+    .capabilities   =  CODEC_CAP_EXPERIMENTAL,
+};
diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index 4151f4b012..78e432aaf5 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -567,14 +567,6 @@ retry:
                s->workaround_bugs, s->lavc_build, s->xvid_build, s->divx_version, s->divx_build,
                s->divx_packed ? "p" : "");
 
-#if 0 // dump bits per frame / qp / complexity
-{
-    static FILE *f=NULL;
-    if(!f) f=fopen("rate_qp_cplx.txt", "w");
-    fprintf(f, "%d %d %f\n", buf_size, s->qscale, buf_size*(double)s->qscale);
-}
-#endif
-
 #if HAVE_MMX
     if (s->codec_id == CODEC_ID_MPEG4 && s->xvid_build>=0 && avctx->idct_algo == FF_IDCT_AUTO && (av_get_cpu_flags() & AV_CPU_FLAG_MMX)) {
         avctx->idct_algo= FF_IDCT_XVIDMMX;
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 22a57866d4..8e3ec336aa 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -153,10 +153,6 @@ const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_l
     h->nal_unit_type= src[0]&0x1F;
 
     src++; length--;
-#if 0
-    for(i=0; i<length; i++)
-        printf("%2X ", src[i]);
-#endif
 
 #if HAVE_FAST_UNALIGNED
 # if HAVE_FAST_64BIT
diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c
index f81a042c7f..6895968426 100644
--- a/libavcodec/huffyuv.c
+++ b/libavcodec/huffyuv.c
@@ -352,11 +352,6 @@ static int read_huffman_tables(HYuvContext *s, const uint8_t *src, int length){
         if(generate_bits_table(s->bits[i], s->len[i])<0){
             return -1;
         }
-#if 0
-for(j=0; j<256; j++){
-printf("%6X, %2d,  %3d\n", s->bits[i][j], s->len[i][j], j);
-}
-#endif
         free_vlc(&s->vlc[i]);
         init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0);
     }
diff --git a/libavcodec/indeo2.c b/libavcodec/indeo2.c
index 68c6246f89..c4d410faf5 100644
--- a/libavcodec/indeo2.c
+++ b/libavcodec/indeo2.c
@@ -146,9 +146,6 @@ static int ir2_decode_frame(AVCodecContext *avctx,
     AVFrame * const p= (AVFrame*)&s->picture;
     int start;
 
-    if(p->data[0])
-        avctx->release_buffer(avctx, p);
-
     p->reference = 1;
     p->buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
     if (avctx->reget_buffer(avctx, p)) {
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 1b4e88b75b..0dd954d710 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -41,6 +41,7 @@ typedef struct X264Context {
     const char *level;
     int fastfirstpass;
     const char *stats;
+    const char *weightp;
 } X264Context;
 
 static void X264_log(void *p, int level, const char *fmt, va_list args)
@@ -252,7 +253,6 @@ static av_cold int X264_init(AVCodecContext *avctx)
     x4->params.analyse.i_direct_mv_pred  = avctx->directpred;
 
     x4->params.analyse.b_weighted_bipred = avctx->flags2 & CODEC_FLAG2_WPRED;
-    x4->params.analyse.i_weighted_pred = avctx->weighted_p_pred;
 
     if (avctx->me_method == ME_EPZS)
         x4->params.analyse.i_me_method = X264_ME_DIA;
@@ -302,6 +302,8 @@ static av_cold int X264_init(AVCodecContext *avctx)
     x4->params.p_log_private        = avctx;
     x4->params.i_log_level          = X264_LOG_DEBUG;
 
+    OPT_STR("weightp", x4->weightp);
+
     x4->params.b_intra_refresh      = avctx->flags2 & CODEC_FLAG2_INTRA_REFRESH;
     x4->params.rc.i_bitrate         = avctx->bit_rate       / 1000;
     x4->params.rc.i_vbv_buffer_size = avctx->rc_buffer_size / 1000;
@@ -404,6 +406,7 @@ static const AVOption options[] = {
     {"profile", "Set profile restrictions", OFFSET(profile), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
     {"level", "Specify level (as defined by Annex A)", OFFSET(level), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
     {"passlogfile", "Filename for 2 pass stats", OFFSET(stats), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
+    {"wpredp", "Weighted prediction for P-frames", OFFSET(weightp), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
     { NULL },
 };
 
diff --git a/libavcodec/mjpegenc.c b/libavcodec/mjpegenc.c
index 15b588c793..db477a0ab6 100644
--- a/libavcodec/mjpegenc.c
+++ b/libavcodec/mjpegenc.c
@@ -156,7 +156,7 @@ static void jpeg_put_comments(MpegEncContext *s)
     int size;
     uint8_t *ptr;
 
-    if (s->aspect_ratio_info /* && !lossless */)
+    if (s->avctx->sample_aspect_ratio.num /* && !lossless */)
     {
     /* JFIF header */
     put_marker(p, APP0);
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index 50078f93cf..62fe2d47aa 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -1119,10 +1119,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
 //    pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
     c->mc_mb_var_sum_temp += (vard+128)>>8;
 
-#if 0
-    printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
-           varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
-#endif
+    av_dlog(s, "varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
+            varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
     if(mb_type){
         int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
         int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c
index 3a37bde063..87cd5be404 100644
--- a/libavcodec/motion_est_template.c
+++ b/libavcodec/motion_est_template.c
@@ -158,6 +158,7 @@ static int hpel_motion_search(MpegEncContext * s,
         const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
                      + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
 
+#if 1
         int key;
         int map_generation= c->map_generation;
 #ifndef NDEBUG
@@ -171,6 +172,7 @@ static int hpel_motion_search(MpegEncContext * s,
         assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
         key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
         assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
+#endif
         if(t<=b){
             CHECK_HALF_MV(0, 1, mx  ,my-1)
             if(l<=r){
@@ -632,25 +634,6 @@ static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
 
         if(x!=best[0] || y!=best[1])
             dia_size=0;
-#if 0
-{
-int dx, dy, i;
-static int stats[8*8];
-dx= FFABS(x-best[0]);
-dy= FFABS(y-best[1]);
-if(dy>dx){
-    dx^=dy; dy^=dx; dx^=dy;
-}
-stats[dy*8 + dx] ++;
-if(256*256*256*64 % (stats[0]+1)==0){
-    for(i=0; i<64; i++){
-        if((i&7)==0) printf("\n");
-        printf("%8d ", stats[i]);
-    }
-    printf("\n");
-}
-}
-#endif
     }
     return dmin;
 }
@@ -983,22 +966,6 @@ static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
 
         if(x!=best[0] || y!=best[1])
             dia_size=0;
-#if 0
-{
-int dx, dy, i;
-static int stats[8*8];
-dx= FFABS(x-best[0]);
-dy= FFABS(y-best[1]);
-stats[dy*8 + dx] ++;
-if(256*256*256*64 % (stats[0]+1)==0){
-    for(i=0; i<64; i++){
-        if((i&7)==0) printf("\n");
-        printf("%6d ", stats[i]);
-    }
-    printf("\n");
-}
-}
-#endif
     }
     return dmin;
 }
diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 192ecdd478..319a513860 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -2574,6 +2574,7 @@ static const AVProfile mpeg2_video_profiles[] = {
     { FF_PROFILE_MPEG2_SIMPLE,       "Simple"             },
     { FF_PROFILE_RESERVED,           "Reserved"           },
     { FF_PROFILE_RESERVED,           "Reserved"           },
+    { FF_PROFILE_UNKNOWN },
 };
 
 
diff --git a/libavcodec/mpeg12enc.c b/libavcodec/mpeg12enc.c
index cc1d9c8230..ce5f29df59 100644
--- a/libavcodec/mpeg12enc.c
+++ b/libavcodec/mpeg12enc.c
@@ -888,10 +888,6 @@ static void mpeg1_encode_block(MpegEncContext *s,
         j = s->intra_scantable.permutated[i];
         level = block[j];
     next_coef:
-#if 0
-        if (level != 0)
-            av_dlog(s->avctx, "level[%d]=%d\n", i, level);
-#endif
         /* encode using VLC */
         if (level != 0) {
             run = i - last_non_zero - 1;
diff --git a/libavcodec/mpegaudioenc.c b/libavcodec/mpegaudioenc.c
index eef40d0fa1..de2a336e34 100644
--- a/libavcodec/mpegaudioenc.c
+++ b/libavcodec/mpegaudioenc.c
@@ -591,13 +591,6 @@ static void compute_bit_allocation(MpegAudioContext *s,
     }
     *padding = max_frame_size - current_frame_size;
     assert(*padding >= 0);
-
-#if 0
-    for(i=0;i<s->sblimit;i++) {
-        printf("%d ", bit_alloc[i]);
-    }
-    printf("\n");
-#endif
 }
 
 /*
@@ -719,15 +712,7 @@ static void encode_frame(MpegAudioContext *s,
                             /* group the 3 values to save bits */
                             put_bits(p, -bits,
                                      q[0] + steps * (q[1] + steps * q[2]));
-#if 0
-                            printf("%d: gr1 %d\n",
-                                   i, q[0] + steps * (q[1] + steps * q[2]));
-#endif
                         } else {
-#if 0
-                            printf("%d: gr3 %d %d %d\n",
-                                   i, q[0], q[1], q[2]);
-#endif
                             put_bits(p, bits, q[0]);
                             put_bits(p, bits, q[1]);
                             put_bits(p, bits, q[2]);
diff --git a/libavcodec/msmpeg4.c b/libavcodec/msmpeg4.c
index 9cbe2b8bef..1ba3cdb2fa 100644
--- a/libavcodec/msmpeg4.c
+++ b/libavcodec/msmpeg4.c
@@ -617,10 +617,6 @@ void msmpeg4_encode_mb(MpegEncContext * s,
             }
             coded_cbp |= val << (5 - i);
         }
-#if 0
-        if (coded_cbp)
-            printf("cbp=%x %x\n", cbp, coded_cbp);
-#endif
 
         if(s->msmpeg4_version<=2){
             if (s->pict_type == FF_I_TYPE) {
@@ -1383,17 +1379,6 @@ int msmpeg4_decode_picture_header(MpegEncContext * s)
 {
     int code;
 
-#if 0
-{
-int i;
-for(i=0; i<s->gb.size_in_bits; i++)
-    av_log(s->avctx, AV_LOG_DEBUG, "%d", get_bits1(&s->gb));
-//    get_bits1(&s->gb);
-av_log(s->avctx, AV_LOG_DEBUG, "END\n");
-return -1;
-}
-#endif
-
     if(s->msmpeg4_version==1){
         int start_code = get_bits_long(&s->gb, 32);
         if(start_code!=0x00000100){
diff --git a/libavcodec/options.c b/libavcodec/options.c
index fae1164c9b..04c58a64df 100644
--- a/libavcodec/options.c
+++ b/libavcodec/options.c
@@ -126,7 +126,6 @@ static const AVOption options[]={
 {"b_qfactor", "qp factor between p and b frames", OFFSET(b_quant_factor), FF_OPT_TYPE_FLOAT, 1.25, -FLT_MAX, FLT_MAX, V|E},
 {"rc_strategy", "ratecontrol method", OFFSET(rc_strategy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"b_strategy", "strategy to choose between I/P/B-frames", OFFSET(b_frame_strategy), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
-{"wpredp", "weighted prediction analysis method", OFFSET(weighted_p_pred), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
 #if FF_API_HURRY_UP
 {"hurry_up", "deprecated, use skip_idct/skip_frame instead", OFFSET(hurry_up), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
 #endif
diff --git a/libavcodec/parser.c b/libavcodec/parser.c
index 34659b7b2f..1000a0bd34 100644
--- a/libavcodec/parser.c
+++ b/libavcodec/parser.c
@@ -309,12 +309,11 @@ int ff_combine_frame(ParseContext *pc, int next, const uint8_t **buf, int *buf_s
         pc->overread++;
     }
 
-#if 0
     if(pc->overread){
-        printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
-        printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
+        av_dlog(pc, "overread %d, state:%X next:%d index:%d o_index:%d\n",
+                pc->overread, pc->state, next, pc->index, pc->overread_index);
+        av_dlog(pc, "%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
     }
-#endif
 
     return 0;
 }
diff --git a/libavcodec/png.h b/libavcodec/png.h
index bab5224851..d6fac3e673 100644
--- a/libavcodec/png.h
+++ b/libavcodec/png.h
@@ -23,6 +23,9 @@
 #define AVCODEC_PNG_H
 
 #include <stdint.h>
+#include <zlib.h>
+
+#include "avcodec.h"
 
 #define PNG_COLOR_MASK_PALETTE    1
 #define PNG_COLOR_MASK_COLOR      2
@@ -69,4 +72,41 @@ int ff_png_pass_row_size(int pass, int bits_per_pixel, int width);
 
 void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
 
+typedef struct PNGDecContext {
+    const uint8_t *bytestream;
+    const uint8_t *bytestream_start;
+    const uint8_t *bytestream_end;
+    AVFrame picture1, picture2;
+    AVFrame *current_picture, *last_picture;
+
+    int state;
+    int width, height;
+    int bit_depth;
+    int color_type;
+    int compression_type;
+    int interlace_type;
+    int filter_type;
+    int channels;
+    int bits_per_pixel;
+    int bpp;
+
+    uint8_t *image_buf;
+    int image_linesize;
+    uint32_t palette[256];
+    uint8_t *crow_buf;
+    uint8_t *last_row;
+    uint8_t *tmp_row;
+    int pass;
+    int crow_size; /* compressed row size (include filter type) */
+    int row_size; /* decompressed row size */
+    int pass_row_size; /* decompress row size of the current pass */
+    int y;
+    z_stream zstream;
+
+    void (*add_bytes_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w);
+    void (*add_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
+} PNGDecContext;
+
+void ff_png_init_mmx(PNGDecContext *s);
+
 #endif /* AVCODEC_PNG_H */
diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index 2f9b343e5b..1199bd5412 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -18,11 +18,13 @@
  * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
+
+//#define DEBUG
+
 #include "libavutil/imgutils.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "png.h"
-#include "dsputil.h"
 
 /* TODO:
  * - add 2, 4 and 16 bit depth support
@@ -30,42 +32,6 @@
 
 #include <zlib.h>
 
-//#define DEBUG
-
-typedef struct PNGDecContext {
-    DSPContext dsp;
-
-    const uint8_t *bytestream;
-    const uint8_t *bytestream_start;
-    const uint8_t *bytestream_end;
-    AVFrame picture1, picture2;
-    AVFrame *current_picture, *last_picture;
-
-    int state;
-    int width, height;
-    int bit_depth;
-    int color_type;
-    int compression_type;
-    int interlace_type;
-    int filter_type;
-    int channels;
-    int bits_per_pixel;
-    int bpp;
-
-    uint8_t *image_buf;
-    int image_linesize;
-    uint32_t palette[256];
-    uint8_t *crow_buf;
-    uint8_t *last_row;
-    uint8_t *tmp_row;
-    int pass;
-    int crow_size; /* compressed row size (include filter type) */
-    int row_size; /* decompressed row size */
-    int pass_row_size; /* decompress row size of the current pass */
-    int y;
-    z_stream zstream;
-} PNGDecContext;
-
 /* Mask to determine which y pixels can be written in a pass */
 static const uint8_t png_pass_dsp_ymask[NB_PASSES] = {
     0xff, 0xff, 0x0f, 0xcc, 0x33, 0xff, 0x55,
@@ -134,7 +100,23 @@ static void png_put_interlaced_row(uint8_t *dst, int width,
     }
 }
 
-void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)
+// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
+#define pb_7f (~0UL/255 * 0x7f)
+#define pb_80 (~0UL/255 * 0x80)
+
+static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
+{
+    long i;
+    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
+        long a = *(long*)(src1+i);
+        long b = *(long*)(src2+i);
+        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
+    }
+    for(; i<w; i++)
+        dst[i] = src1[i]+src2[i];
+}
+
+static void add_paeth_prediction_c(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)
 {
     int i;
     for(i = 0; i < w; i++) {
@@ -191,7 +173,7 @@ void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w
     }
 
 /* NOTE: 'dst' can be equal to 'last' */
-static void png_filter_row(DSPContext *dsp, uint8_t *dst, int filter_type,
+static void png_filter_row(PNGDecContext *s, uint8_t *dst, int filter_type,
                            uint8_t *src, uint8_t *last, int size, int bpp)
 {
     int i, p, r, g, b, a;
@@ -217,7 +199,7 @@ static void png_filter_row(DSPContext *dsp, uint8_t *dst, int filter_type,
         }
         break;
     case PNG_FILTER_VALUE_UP:
-        dsp->add_bytes_l2(dst, src, last, size);
+        s->add_bytes_l2(dst, src, last, size);
         break;
     case PNG_FILTER_VALUE_AVG:
         for(i = 0; i < bpp; i++) {
@@ -235,10 +217,10 @@ static void png_filter_row(DSPContext *dsp, uint8_t *dst, int filter_type,
         if(bpp > 1 && size > 4) {
             // would write off the end of the array if we let it process the last pixel with bpp=3
             int w = bpp==4 ? size : size-3;
-            dsp->add_png_paeth_prediction(dst+i, src+i, last+i, w-i, bpp);
+            s->add_paeth_prediction(dst+i, src+i, last+i, w-i, bpp);
             i = w;
         }
-        ff_add_png_paeth_prediction(dst+i, src+i, last+i, size-i, bpp);
+        add_paeth_prediction_c(dst+i, src+i, last+i, size-i, bpp);
         break;
     }
 }
@@ -291,7 +273,7 @@ static void png_handle_row(PNGDecContext *s)
         ptr = s->image_buf + s->image_linesize * s->y;
         /* need to swap bytes correctly for RGB_ALPHA */
         if (s->color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
-            png_filter_row(&s->dsp, s->tmp_row, s->crow_buf[0], s->crow_buf + 1,
+            png_filter_row(s, s->tmp_row, s->crow_buf[0], s->crow_buf + 1,
                            s->last_row, s->row_size, s->bpp);
             convert_to_rgb32(ptr, s->tmp_row, s->width, s->filter_type == PNG_FILTER_TYPE_LOCO);
             FFSWAP(uint8_t*, s->last_row, s->tmp_row);
@@ -302,7 +284,7 @@ static void png_handle_row(PNGDecContext *s)
             else
                 last_row = ptr - s->image_linesize;
 
-            png_filter_row(&s->dsp, ptr, s->crow_buf[0], s->crow_buf + 1,
+            png_filter_row(s, ptr, s->crow_buf[0], s->crow_buf + 1,
                            last_row, s->row_size, s->bpp);
         }
         /* loco lags by 1 row so that it doesn't interfere with top prediction */
@@ -325,7 +307,7 @@ static void png_handle_row(PNGDecContext *s)
                    wait for the next one */
                 if (got_line)
                     break;
-                png_filter_row(&s->dsp, s->tmp_row, s->crow_buf[0], s->crow_buf + 1,
+                png_filter_row(s, s->tmp_row, s->crow_buf[0], s->crow_buf + 1,
                                s->last_row, s->pass_row_size, s->bpp);
                 FFSWAP(uint8_t*, s->last_row, s->tmp_row);
                 got_line = 1;
@@ -491,7 +473,7 @@ static int decode_frame(AVCodecContext *avctx,
                 } else if (s->color_type == PNG_COLOR_TYPE_PALETTE) {
                     avctx->pix_fmt = PIX_FMT_PAL8;
                 } else if (s->color_type == PNG_COLOR_TYPE_GRAY_ALPHA) {
-                    avctx->pix_fmt = PIX_FMT_Y400A;
+                    avctx->pix_fmt = PIX_FMT_GRAY8A;
                 } else {
                     goto fail;
                 }
@@ -633,14 +615,23 @@ static int decode_frame(AVCodecContext *avctx,
     goto the_end;
 }
 
-static av_cold int png_dec_init(AVCodecContext *avctx){
+static av_cold int png_dec_init(AVCodecContext *avctx)
+{
     PNGDecContext *s = avctx->priv_data;
 
     s->current_picture = &s->picture1;
     s->last_picture = &s->picture2;
     avcodec_get_frame_defaults(&s->picture1);
     avcodec_get_frame_defaults(&s->picture2);
-    dsputil_init(&s->dsp, avctx);
+
+#if HAVE_MMX
+    ff_png_init_mmx(s);
+#endif
+
+    if (!s->add_paeth_prediction)
+        s->add_paeth_prediction = add_paeth_prediction_c;
+    if (!s->add_bytes_l2)
+        s->add_bytes_l2 = add_bytes_l2_c;
 
     return 0;
 }
@@ -668,6 +659,5 @@ AVCodec ff_png_decoder = {
     decode_frame,
     CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
     NULL,
-    .max_lowres = 5,
     .long_name = NULL_IF_CONFIG_SMALL("PNG image"),
 };
diff --git a/libavcodec/pnmdec.c b/libavcodec/pnmdec.c
index 39faab7be2..c746db6cc2 100644
--- a/libavcodec/pnmdec.c
+++ b/libavcodec/pnmdec.c
@@ -199,7 +199,6 @@ AVCodec ff_pgm_decoder = {
     pnm_decode_frame,
     CODEC_CAP_DR1,
     .pix_fmts  = (const enum PixelFormat[]){PIX_FMT_GRAY8, PIX_FMT_GRAY16BE, PIX_FMT_NONE},
-    .max_lowres = 5,
     .long_name = NULL_IF_CONFIG_SMALL("PGM (Portable GrayMap) image"),
 };
 #endif
@@ -216,7 +215,6 @@ AVCodec ff_pgmyuv_decoder = {
     pnm_decode_frame,
     CODEC_CAP_DR1,
     .pix_fmts  = (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
-    .max_lowres = 5,
     .long_name = NULL_IF_CONFIG_SMALL("PGMYUV (Portable GrayMap YUV) image"),
 };
 #endif
@@ -233,7 +231,6 @@ AVCodec ff_ppm_decoder = {
     pnm_decode_frame,
     CODEC_CAP_DR1,
     .pix_fmts  = (const enum PixelFormat[]){PIX_FMT_RGB24, PIX_FMT_RGB48BE, PIX_FMT_NONE},
-    .max_lowres = 5,
     .long_name = NULL_IF_CONFIG_SMALL("PPM (Portable PixelMap) image"),
 };
 #endif
@@ -250,7 +247,6 @@ AVCodec ff_pbm_decoder = {
     pnm_decode_frame,
     CODEC_CAP_DR1,
     .pix_fmts  = (const enum PixelFormat[]){PIX_FMT_MONOWHITE, PIX_FMT_NONE},
-    .max_lowres = 5,
     .long_name = NULL_IF_CONFIG_SMALL("PBM (Portable BitMap) image"),
 };
 #endif
@@ -267,7 +263,6 @@ AVCodec ff_pam_decoder = {
     pnm_decode_frame,
     CODEC_CAP_DR1,
     .pix_fmts  = (const enum PixelFormat[]){PIX_FMT_RGB24, PIX_FMT_RGB32, PIX_FMT_GRAY8, PIX_FMT_MONOWHITE, PIX_FMT_NONE},
-    .max_lowres = 5,
     .long_name = NULL_IF_CONFIG_SMALL("PAM (Portable AnyMap) image"),
 };
 #endif
diff --git a/libavcodec/qpeg.c b/libavcodec/qpeg.c
index e4c2291f39..497d113683 100644
--- a/libavcodec/qpeg.c
+++ b/libavcodec/qpeg.c
@@ -28,8 +28,8 @@
 
 typedef struct QpegContext{
     AVCodecContext *avctx;
-    AVFrame pic;
-    uint8_t *refdata;
+    AVFrame pic, ref;
+    uint32_t pal[256];
 } QpegContext;
 
 static void qpeg_decode_intra(const uint8_t *src, uint8_t *dst, int size,
@@ -123,9 +123,12 @@ static void qpeg_decode_inter(const uint8_t *src, uint8_t *dst, int size,
     int filled = 0;
     int orig_height;
 
+    if(!refdata)
+        refdata= dst;
+
     /* copy prev frame */
     for(i = 0; i < height; i++)
-        memcpy(refdata + (i * width), dst + (i * stride), width);
+        memcpy(dst + (i * stride), refdata + (i * stride), width);
 
     orig_height = height;
     height--;
@@ -171,10 +174,10 @@ static void qpeg_decode_inter(const uint8_t *src, uint8_t *dst, int size,
                                me_x, me_y, me_w, me_h, filled, height);
                     else {
                         /* do motion compensation */
-                        me_plane = refdata + (filled + me_x) + (height - me_y) * width;
+                        me_plane = refdata + (filled + me_x) + (height - me_y) * stride;
                         for(j = 0; j < me_h; j++) {
                             for(i = 0; i < me_w; i++)
-                                dst[filled + i - (j * stride)] = me_plane[i - (j * width)];
+                                dst[filled + i - (j * stride)] = me_plane[i - (j * stride)];
                         }
                     }
                 }
@@ -253,14 +256,16 @@ static int decode_frame(AVCodecContext *avctx,
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     QpegContext * const a = avctx->priv_data;
-    AVFrame * const p= (AVFrame*)&a->pic;
+    AVFrame * p= (AVFrame*)&a->pic;
+    AVFrame * ref= (AVFrame*)&a->ref;
     uint8_t* outdata;
     int delta;
 
-    if(p->data[0])
-        avctx->release_buffer(avctx, p);
+    if(ref->data[0])
+        avctx->release_buffer(avctx, ref);
+    FFSWAP(AVFrame, *ref, *p);
 
-    p->reference= 0;
+    p->reference= 3;
     if(avctx->get_buffer(avctx, p) < 0){
         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
         return -1;
@@ -270,7 +275,7 @@ static int decode_frame(AVCodecContext *avctx,
         qpeg_decode_intra(buf+0x86, outdata, buf_size - 0x86, a->pic.linesize[0], avctx->width, avctx->height);
     } else {
         delta = buf[0x85];
-        qpeg_decode_inter(buf+0x86, outdata, buf_size - 0x86, a->pic.linesize[0], avctx->width, avctx->height, delta, buf + 4, a->refdata);
+        qpeg_decode_inter(buf+0x86, outdata, buf_size - 0x86, a->pic.linesize[0], avctx->width, avctx->height, delta, buf + 4, a->ref.data[0]);
     }
 
     /* make the palette available on the way out */
@@ -295,7 +300,6 @@ static av_cold int decode_init(AVCodecContext *avctx){
     }
     a->avctx = avctx;
     avctx->pix_fmt= PIX_FMT_PAL8;
-    a->refdata = av_malloc(avctx->width * avctx->height);
 
     return 0;
 }
@@ -303,11 +307,13 @@ static av_cold int decode_init(AVCodecContext *avctx){
 static av_cold int decode_end(AVCodecContext *avctx){
     QpegContext * const a = avctx->priv_data;
     AVFrame * const p= (AVFrame*)&a->pic;
+    AVFrame * const ref= (AVFrame*)&a->ref;
 
     if(p->data[0])
         avctx->release_buffer(avctx, p);
+    if(ref->data[0])
+        avctx->release_buffer(avctx, ref);
 
-    av_free(a->refdata);
     return 0;
 }
 
diff --git a/libavcodec/ratecontrol.c b/libavcodec/ratecontrol.c
index 1fe947fa9c..bb17490a12 100644
--- a/libavcodec/ratecontrol.c
+++ b/libavcodec/ratecontrol.c
@@ -806,14 +806,6 @@ float ff_rate_estimate_qscale(MpegEncContext *s, int dry_run)
         rcc->last_mc_mb_var_sum= pic->mc_mb_var_sum;
         rcc->last_mb_var_sum= pic->mb_var_sum;
     }
-#if 0
-{
-    static int mvsum=0, texsum=0;
-    mvsum += s->mv_bits;
-    texsum += s->i_tex_bits + s->p_tex_bits;
-    printf("%d %d//\n\n", mvsum, texsum);
-}
-#endif
     return q;
 }
 
diff --git a/libavcodec/raw.c b/libavcodec/raw.c
index e607148a2b..0a865f470a 100644
--- a/libavcodec/raw.c
+++ b/libavcodec/raw.c
@@ -114,7 +114,7 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { PIX_FMT_YUV444P16LE, MKTAG('Y', '3',  0 , 16 ) },
     { PIX_FMT_YUV444P16BE, MKTAG(16 ,  0 , '3', 'Y') },
     { PIX_FMT_YUVA420P,    MKTAG('Y', '4', 11 ,  8 ) },
-    { PIX_FMT_Y400A,       MKTAG('Y', '2',  0 ,  8 ) },
+    { PIX_FMT_GRAY8A,      MKTAG('Y', '2',  0 ,  8 ) },
 
     /* quicktime */
     { PIX_FMT_UYVY422, MKTAG('2', 'v', 'u', 'y') },
diff --git a/libavcodec/sh4/idct_sh4.c b/libavcodec/sh4/idct_sh4.c
index 8d1a31670f..13a85b030e 100644
--- a/libavcodec/sh4/idct_sh4.c
+++ b/libavcodec/sh4/idct_sh4.c
@@ -54,8 +54,6 @@ static const float odd_table[] __attribute__ ((aligned(8))) = {
 #undef  c6
 #undef  c7
 
-#if 1
-
 #define         load_matrix(table) \
     do { \
         const float *t = table; \
@@ -84,52 +82,11 @@ static const float odd_table[] __attribute__ ((aligned(8))) = {
         register float fr2 __asm__("fr2"); \
         register float fr3 __asm__("fr3")
 
-#else
-
-/* generic C code for check */
-
-static void ftrv_(const float xf[],float fv[])
-{
-        float f0,f1,f2,f3;
-        f0 = fv[0];
-        f1 = fv[1];
-        f2 = fv[2];
-        f3 = fv[3];
-        fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3;
-        fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3;
-        fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3;
-        fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3;
-}
-
-static void load_matrix_(float xf[],const float table[])
-{
-        int i;
-        for(i=0;i<16;i++) xf[i]=table[i];
-}
-
-#define         ftrv()                  ftrv_(xf,fv)
-#define         load_matrix(table)      load_matrix_(xf,table)
-
-#define         DEFREG \
-        float fv[4],xf[16]
-
-#define         fr0     fv[0]
-#define         fr1     fv[1]
-#define         fr2     fv[2]
-#define         fr3     fv[3]
-
-#endif
-
-#if 1
 #define         DESCALE(x,n)    (x)*(1.0f/(1<<(n)))
-#else
-#define         DESCALE(x,n)    (((int)(x)+(1<<(n-1)))>>(n))
-#endif
 
 /* this code work worse on gcc cvs. 3.2.3 work fine */
 
 
-#if 1
 //optimized
 
 void idct_sh4(DCTELEM *block)
@@ -252,111 +209,3 @@ void idct_sh4(DCTELEM *block)
 
         fp_single_leave(fpscr);
 }
-#else
-void idct_sh4(DCTELEM *block)
-{
-        DEFREG;
-
-        int i;
-        float   tblock[8*8],*fblock;
-
-        /* row */
-
-        /* even part */
-        load_matrix(even_table);
-
-        fblock = tblock;
-        i = 8;
-        do {
-                fr0 = block[0];
-                fr1 = block[2];
-                fr2 = block[4];
-                fr3 = block[6];
-                block+=8;
-                ftrv();
-                fblock[0] = fr0;
-                fblock[2] = fr1;
-                fblock[4] = fr2;
-                fblock[6] = fr3;
-                fblock+=8;
-        } while(--i);
-        block-=8*8;
-        fblock-=8*8;
-
-        load_matrix(odd_table);
-
-        i = 8;
-
-        do {
-                float t0,t1,t2,t3;
-                fr0 = block[1];
-                fr1 = block[3];
-                fr2 = block[5];
-                fr3 = block[7];
-                block+=8;
-                ftrv();
-                t0 = fblock[0];
-                t1 = fblock[2];
-                t2 = fblock[4];
-                t3 = fblock[6];
-                fblock[0] = t0 + fr0;
-                fblock[7] = t0 - fr0;
-                fblock[1] = t1 + fr1;
-                fblock[6] = t1 - fr1;
-                fblock[2] = t2 + fr2;
-                fblock[5] = t2 - fr2;
-                fblock[3] = t3 + fr3;
-                fblock[4] = t3 - fr3;
-                fblock+=8;
-        } while(--i);
-        block-=8*8;
-        fblock-=8*8;
-
-        /* col */
-
-        /* even part */
-        load_matrix(even_table);
-
-        i = 8;
-
-        do {
-                fr0 = fblock[8*0];
-                fr1 = fblock[8*2];
-                fr2 = fblock[8*4];
-                fr3 = fblock[8*6];
-                ftrv();
-                fblock[8*0] = fr0;
-                fblock[8*2] = fr1;
-                fblock[8*4] = fr2;
-                fblock[8*6] = fr3;
-                fblock++;
-        } while(--i);
-        fblock-=8;
-
-        load_matrix(odd_table);
-
-        i=8;
-        do {
-                float t0,t1,t2,t3;
-                fr0 = fblock[8*1];
-                fr1 = fblock[8*3];
-                fr2 = fblock[8*5];
-                fr3 = fblock[8*7];
-                ftrv();
-                t0 = fblock[8*0];
-                t1 = fblock[8*2];
-                t2 = fblock[8*4];
-                t3 = fblock[8*6];
-                fblock++;
-                block[8*0] = DESCALE(t0 + fr0,3);
-                block[8*7] = DESCALE(t0 - fr0,3);
-                block[8*1] = DESCALE(t1 + fr1,3);
-                block[8*6] = DESCALE(t1 - fr1,3);
-                block[8*2] = DESCALE(t2 + fr2,3);
-                block[8*5] = DESCALE(t2 - fr2,3);
-                block[8*3] = DESCALE(t3 + fr3,3);
-                block[8*4] = DESCALE(t3 - fr3,3);
-                block++;
-        } while(--i);
-}
-#endif
diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
index e3f00b8415..e6c3460d73 100644
--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c
@@ -360,8 +360,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
 
     if(buf_size <= 769)
         return 0;
-    if(smk->pic.data[0])
-            avctx->release_buffer(avctx, &smk->pic);
 
     smk->pic.reference = 1;
     smk->pic.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
index dd31edaf85..1af978f21b 100644
--- a/libavcodec/sp5xdec.c
+++ b/libavcodec/sp5xdec.c
@@ -107,7 +107,7 @@ AVCodec ff_sp5x_decoder = {
     sp5x_decode_frame,
     CODEC_CAP_DR1,
     NULL,
-    .max_lowres = 5,
+    .max_lowres = 3,
     .long_name = NULL_IF_CONFIG_SMALL("Sunplus JPEG (SP5X)"),
 };
 
diff --git a/libavcodec/svq1dec.c b/libavcodec/svq1dec.c
index cb84ea98a3..04b12b756c 100644
--- a/libavcodec/svq1dec.c
+++ b/libavcodec/svq1dec.c
@@ -378,13 +378,6 @@ static int svq1_motion_inter_block (MpegEncContext *s, GetBitContext *bitbuf,
   if(x + (mv.x >> 1)<0)
      mv.x= 0;
 
-#if 0
-  int w= (s->width+15)&~15;
-  int h= (s->height+15)&~15;
-  if(x + (mv.x >> 1)<0 || y + (mv.y >> 1)<0 || x + (mv.x >> 1) + 16 > w || y + (mv.y >> 1) + 16> h)
-      av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mv.x >> 1), y + (mv.y >> 1));
-#endif
-
   src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch];
   dst = current;
 
@@ -461,12 +454,6 @@ static int svq1_motion_inter_4v_block (MpegEncContext *s, GetBitContext *bitbuf,
     if(x + (mvx >> 1)<0)
        mvx= 0;
 
-#if 0
-  int w= (s->width+15)&~15;
-  int h= (s->height+15)&~15;
-  if(x + (mvx >> 1)<0 || y + (mvy >> 1)<0 || x + (mvx >> 1) + 8 > w || y + (mvy >> 1) + 8> h)
-      av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mvx >> 1), y + (mvy >> 1));
-#endif
     src = &previous[(x + (mvx >> 1)) + (y + (mvy >> 1))*pitch];
     dst = current;
 
diff --git a/libavcodec/targa.c b/libavcodec/targa.c
index 3c220f4082..6f9bc13151 100644
--- a/libavcodec/targa.c
+++ b/libavcodec/targa.c
@@ -196,10 +196,10 @@ static int decode_frame(AVCodecContext *avctx,
             int r, g, b, t;
             int32_t *pal = ((int32_t*)p->data[1]) + first_clr;
             for(t = 0; t < colors; t++){
-                r = *buf++;
-                g = *buf++;
                 b = *buf++;
-                *pal++ = (b << 16) | (g << 8) | r;
+                g = *buf++;
+                r = *buf++;
+                *pal++ = (0xff<<24) | (r << 16) | (g << 8) | b;
             }
             p->palette_has_changed = 1;
         }
diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index 8295a4b4c0..d43999b2f0 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -429,7 +429,8 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *
         bp = buf + count / 3 * off * 2;
         off = (type_sizes[type] - 1) << 3;
         for(i = 0; i < count / 3; i++){
-            j = (tget(&rp, type, s->le) >> off) << 16;
+            j = 0xff << 24;
+            j |= (tget(&rp, type, s->le) >> off) << 16;
             j |= (tget(&gp, type, s->le) >> off) << 8;
             j |= tget(&bp, type, s->le) >> off;
             pal[i] = j;
diff --git a/libavcodec/tta.c b/libavcodec/tta.c
index ece5c1c43b..3367788633 100644
--- a/libavcodec/tta.c
+++ b/libavcodec/tta.c
@@ -205,6 +205,16 @@ static int tta_get_unary(GetBitContext *gb)
     return ret;
 }
 
+static const int64_t tta_channel_layouts[7] = {
+    AV_CH_LAYOUT_STEREO,
+    AV_CH_LAYOUT_STEREO|AV_CH_LOW_FREQUENCY,
+    AV_CH_LAYOUT_QUAD,
+    0,
+    AV_CH_LAYOUT_5POINT1_BACK,
+    AV_CH_LAYOUT_5POINT1_BACK|AV_CH_BACK_CENTER,
+    AV_CH_LAYOUT_7POINT1_WIDE
+};
+
 static av_cold int tta_decode_init(AVCodecContext * avctx)
 {
     TTAContext *s = avctx->priv_data;
@@ -234,6 +244,8 @@ static av_cold int tta_decode_init(AVCodecContext * avctx)
         }
         s->is_float = (s->flags == FORMAT_FLOAT);
         avctx->channels = s->channels = get_bits(&s->gb, 16);
+        if (s->channels > 1 && s->channels < 9)
+            avctx->channel_layout = tta_channel_layouts[s->channels-2];
         avctx->bits_per_coded_sample = get_bits(&s->gb, 16);
         s->bps = (avctx->bits_per_coded_sample + 7) / 8;
         avctx->sample_rate = get_bits_long(&s->gb, 32);
@@ -286,6 +298,8 @@ static av_cold int tta_decode_init(AVCodecContext * avctx)
         }
 
         s->decode_buffer = av_mallocz(sizeof(int32_t)*s->frame_length*s->channels);
+        if (!s->decode_buffer)
+            return AVERROR(ENOMEM);
         s->ch_ctx = av_malloc(avctx->channels * sizeof(*s->ch_ctx));
         if (!s->ch_ctx)
             return AVERROR(ENOMEM);
diff --git a/libavcodec/ulti.c b/libavcodec/ulti.c
index bb1270f055..83a66ab85e 100644
--- a/libavcodec/ulti.c
+++ b/libavcodec/ulti.c
@@ -224,13 +224,10 @@ static int ulti_decode_frame(AVCodecContext *avctx,
     int skip;
     int tmp;
 
-    if(s->frame.data[0])
-        avctx->release_buffer(avctx, &s->frame);
-
     s->frame.reference = 1;
     s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
-    if(avctx->get_buffer(avctx, &s->frame) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if (avctx->reget_buffer(avctx, &s->frame) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
         return -1;
     }
 
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 582d88cec4..a350605f97 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -568,15 +568,50 @@ int attribute_align_arg avcodec_open(AVCodecContext *avctx, AVCodec *codec)
         ret = AVERROR(EINVAL);
         goto free_and_end;
     }
-    if (avctx->codec->sample_fmts && avctx->codec->encode) {
+    if (avctx->codec->encode) {
         int i;
-        for (i = 0; avctx->codec->sample_fmts[i] != AV_SAMPLE_FMT_NONE; i++)
-            if (avctx->sample_fmt == avctx->codec->sample_fmts[i])
-                break;
-        if (avctx->codec->sample_fmts[i] == AV_SAMPLE_FMT_NONE) {
-            av_log(avctx, AV_LOG_ERROR, "Specified sample_fmt is not supported.\n");
-            ret = AVERROR(EINVAL);
-            goto free_and_end;
+        if (avctx->codec->sample_fmts) {
+            for (i = 0; avctx->codec->sample_fmts[i] != AV_SAMPLE_FMT_NONE; i++)
+                if (avctx->sample_fmt == avctx->codec->sample_fmts[i])
+                    break;
+            if (avctx->codec->sample_fmts[i] == AV_SAMPLE_FMT_NONE) {
+                av_log(avctx, AV_LOG_ERROR, "Specified sample_fmt is not supported.\n");
+                ret = AVERROR(EINVAL);
+                goto free_and_end;
+            }
+        }
+        if (avctx->codec->supported_samplerates) {
+            for (i = 0; avctx->codec->supported_samplerates[i] != 0; i++)
+                if (avctx->sample_rate == avctx->codec->supported_samplerates[i])
+                    break;
+            if (avctx->codec->supported_samplerates[i] == 0) {
+                av_log(avctx, AV_LOG_ERROR, "Specified sample_rate is not supported\n");
+                ret = AVERROR(EINVAL);
+                goto free_and_end;
+            }
+        }
+        if (avctx->codec->channel_layouts) {
+            if (!avctx->channel_layout) {
+                av_log(avctx, AV_LOG_WARNING, "channel_layout not specified\n");
+            } else {
+                for (i = 0; avctx->codec->channel_layouts[i] != 0; i++)
+                    if (avctx->channel_layout == avctx->codec->channel_layouts[i])
+                        break;
+                if (avctx->codec->channel_layouts[i] == 0) {
+                    av_log(avctx, AV_LOG_ERROR, "Specified channel_layout is not supported\n");
+                    ret = AVERROR(EINVAL);
+                    goto free_and_end;
+                }
+            }
+        }
+        if (avctx->channel_layout && avctx->channels) {
+            if (av_get_channel_layout_nb_channels(avctx->channel_layout) != avctx->channels) {
+                av_log(avctx, AV_LOG_ERROR, "channel layout does not match number of channels\n");
+                ret = AVERROR(EINVAL);
+                goto free_and_end;
+            }
+        } else if (avctx->channel_layout) {
+            avctx->channels = av_get_channel_layout_nb_channels(avctx->channel_layout);
         }
     }
 
@@ -688,23 +723,6 @@ static int64_t guess_correct_pts(AVCodecContext *ctx,
     return pts;
 }
 
-
-#if FF_API_VIDEO_OLD
-int attribute_align_arg avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture,
-                         int *got_picture_ptr,
-                         const uint8_t *buf, int buf_size)
-{
-    AVPacket avpkt;
-    av_init_packet(&avpkt);
-    avpkt.data = buf;
-    avpkt.size = buf_size;
-    // HACK for CorePNG to decode as normal PNG by default
-    avpkt.flags = AV_PKT_FLAG_KEY;
-
-    return avcodec_decode_video2(avctx, picture, got_picture_ptr, &avpkt);
-}
-#endif
-
 int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture,
                          int *got_picture_ptr,
                          AVPacket *avpkt)
@@ -1249,11 +1267,9 @@ void av_log_missing_feature(void *avc, const char *feature, int want_sample)
     av_log(avc, AV_LOG_WARNING, "%s not implemented. Update your FFmpeg "
             "version to the newest one from Git. If the problem still "
             "occurs, it means that your file has a feature which has not "
-            "been implemented.", feature);
+            "been implemented.\n", feature);
     if(want_sample)
         av_log_ask_for_sample(avc, NULL);
-    else
-        av_log(avc, AV_LOG_WARNING, "\n");
 }
 
 void av_log_ask_for_sample(void *avc, const char *msg, ...)
diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c
index 47388d8302..68acc77ab9 100644
--- a/libavcodec/vorbis.c
+++ b/libavcodec/vorbis.c
@@ -51,14 +51,13 @@ unsigned int ff_vorbis_nth_root(unsigned int x, unsigned int n)
 // the two bits[p] > 32 checks should be redundant, all calling code should
 // already ensure that, but since it allows overwriting the stack it seems
 // reasonable to check redundantly.
-int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, uint_fast32_t num)
+int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num)
 {
     uint_fast32_t exit_at_level[33] = {
         404, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 
-    uint_fast8_t i, j;
-    uint_fast32_t code, p;
+    unsigned i, j, p, code;
 
 #ifdef V_DEBUG
     GetBitContext gb;
@@ -78,8 +77,8 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, uint_fast32_t num)
         exit_at_level[i+1] = 1 << i;
 
 #ifdef V_DEBUG
-    av_log(NULL, AV_LOG_INFO, " %d. of %d code len %d code %d - ", p, num, bits[p], codes[p]);
-    init_get_bits(&gb, (uint_fast8_t *)&codes[p], bits[p]);
+    av_log(NULL, AV_LOG_INFO, " %u. of %u code len %d code %d - ", p, num, bits[p], codes[p]);
+    init_get_bits(&gb, (uint8_t *)&codes[p], bits[p]);
     for (i = 0; i < bits[p]; ++i)
         av_log(NULL, AV_LOG_INFO, "%s", get_bits1(&gb) ? "1" : "0");
     av_log(NULL, AV_LOG_INFO, "\n");
diff --git a/libavcodec/vorbis.h b/libavcodec/vorbis.h
index 64bade62bf..28c1bcf636 100644
--- a/libavcodec/vorbis.h
+++ b/libavcodec/vorbis.h
@@ -38,7 +38,7 @@ typedef struct {
 
 void ff_vorbis_ready_floor1_list(vorbis_floor1_entry * list, int values);
 unsigned int ff_vorbis_nth_root(unsigned int x, unsigned int n); // x^(1/n)
-int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, uint_fast32_t num);
+int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num);
 void ff_vorbis_floor1_render_list(vorbis_floor1_entry * list, int values,
                                   uint_fast16_t * y_list, int * flag,
                                   int multiplier, float * out, int samples);
diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index cad30a9ffa..161c54d6bc 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -177,7 +177,7 @@ static const char idx_err_str[] = "Index value %d out of range (0 - %d) for %s a
         VALIDATE_INDEX(idx, limit)\
     }
 
-static float vorbisfloat2float(uint_fast32_t val)
+static float vorbisfloat2float(unsigned val)
 {
     double mant = val & 0x1fffff;
     long exp    = (val & 0x7fe00000L) >> 21;
@@ -191,7 +191,7 @@ static float vorbisfloat2float(uint_fast32_t val)
 
 static void vorbis_free(vorbis_context *vc)
 {
-    int_fast16_t i;
+    int i;
 
     av_freep(&vc->channel_residues);
     av_freep(&vc->channel_floors);
@@ -237,7 +237,7 @@ static void vorbis_free(vorbis_context *vc)
 
 static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
 {
-    uint_fast16_t cb;
+    unsigned cb;
     uint8_t  *tmp_vlc_bits;
     uint32_t *tmp_vlc_codes;
     GetBitContext *gb = &vc->gb;
@@ -247,43 +247,46 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
 
     AV_DEBUG(" Codebooks: %d \n", vc->codebook_count);
 
-    vc->codebooks = av_mallocz(vc->codebook_count * sizeof(vorbis_codebook));
-    tmp_vlc_bits  = av_mallocz(V_MAX_VLCS * sizeof(uint8_t));
-    tmp_vlc_codes = av_mallocz(V_MAX_VLCS * sizeof(uint32_t));
+    vc->codebooks = av_mallocz(vc->codebook_count * sizeof(*vc->codebooks));
+    tmp_vlc_bits  = av_mallocz(V_MAX_VLCS * sizeof(*tmp_vlc_bits));
+    tmp_vlc_codes = av_mallocz(V_MAX_VLCS * sizeof(*tmp_vlc_codes));
     codebook_multiplicands = av_malloc(V_MAX_VLCS * sizeof(*codebook_multiplicands));
 
     for (cb = 0; cb < vc->codebook_count; ++cb) {
         vorbis_codebook *codebook_setup = &vc->codebooks[cb];
-        uint_fast8_t ordered;
-        uint_fast32_t t, used_entries = 0;
-        uint_fast32_t entries;
+        unsigned ordered, t, entries, used_entries = 0;
 
-        AV_DEBUG(" %d. Codebook \n", cb);
+        AV_DEBUG(" %u. Codebook\n", cb);
 
         if (get_bits(gb, 24) != 0x564342) {
-            av_log(vc->avccontext, AV_LOG_ERROR, " %"PRIdFAST16". Codebook setup data corrupt. \n", cb);
+            av_log(vc->avccontext, AV_LOG_ERROR,
+                   " %u. Codebook setup data corrupt.\n", cb);
             goto error;
         }
 
         codebook_setup->dimensions=get_bits(gb, 16);
         if (codebook_setup->dimensions > 16 || codebook_setup->dimensions == 0) {
-            av_log(vc->avccontext, AV_LOG_ERROR, " %"PRIdFAST16". Codebook's dimension is invalid (%d). \n", cb, codebook_setup->dimensions);
+            av_log(vc->avccontext, AV_LOG_ERROR,
+                   " %u. Codebook's dimension is invalid (%d).\n",
+                   cb, codebook_setup->dimensions);
             goto error;
         }
         entries = get_bits(gb, 24);
         if (entries > V_MAX_VLCS) {
-            av_log(vc->avccontext, AV_LOG_ERROR, " %"PRIdFAST16". Codebook has too many entries (%"PRIdFAST32"). \n", cb, entries);
+            av_log(vc->avccontext, AV_LOG_ERROR,
+                   " %u. Codebook has too many entries (%u).\n",
+                   cb, entries);
             goto error;
         }
 
         ordered = get_bits1(gb);
 
-        AV_DEBUG(" codebook_dimensions %d, codebook_entries %d \n", codebook_setup->dimensions, entries);
+        AV_DEBUG(" codebook_dimensions %d, codebook_entries %u\n",
+                 codebook_setup->dimensions, entries);
 
         if (!ordered) {
-            uint_fast16_t ce;
-            uint_fast8_t  flag;
-            uint_fast8_t  sparse = get_bits1(gb);
+            unsigned ce, flag;
+            unsigned sparse = get_bits1(gb);
 
             AV_DEBUG(" not ordered \n");
 
@@ -307,20 +310,20 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
                     tmp_vlc_bits[ce] = get_bits(gb, 5) + 1;
             }
         } else {
-            uint_fast16_t current_entry = 0;
-            uint_fast8_t current_length = get_bits(gb, 5)+1;
+            unsigned current_entry  = 0;
+            unsigned current_length = get_bits(gb, 5) + 1;
 
-            AV_DEBUG(" ordered, current length: %d \n", current_length);  //FIXME
+            AV_DEBUG(" ordered, current length: %u\n", current_length);  //FIXME
 
             used_entries = entries;
             for (; current_entry < used_entries && current_length <= 32; ++current_length) {
-                uint_fast16_t i, number;
+                unsigned i, number;
 
-                AV_DEBUG(" number bits: %d ", ilog(entries - current_entry));
+                AV_DEBUG(" number bits: %u ", ilog(entries - current_entry));
 
                 number = get_bits(gb, ilog(entries - current_entry));
 
-                AV_DEBUG(" number: %d \n", number);
+                AV_DEBUG(" number: %u\n", number);
 
                 for (i = current_entry; i < number+current_entry; ++i)
                     if (i < used_entries)
@@ -341,13 +344,13 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
 // If the codebook is used for (inverse) VQ, calculate codevectors.
 
         if (codebook_setup->lookup_type == 1) {
-            uint_fast16_t i, j, k;
-            uint_fast16_t codebook_lookup_values = ff_vorbis_nth_root(entries, codebook_setup->dimensions);
+            unsigned i, j, k;
+            unsigned codebook_lookup_values = ff_vorbis_nth_root(entries, codebook_setup->dimensions);
 
             float codebook_minimum_value = vorbisfloat2float(get_bits_long(gb, 32));
             float codebook_delta_value   = vorbisfloat2float(get_bits_long(gb, 32));
-            uint_fast8_t codebook_value_bits = get_bits(gb, 4)+1;
-            uint_fast8_t codebook_sequence_p = get_bits1(gb);
+            unsigned codebook_value_bits = get_bits(gb, 4) + 1;
+            unsigned codebook_sequence_p = get_bits1(gb);
 
             AV_DEBUG(" We expect %d numbers for building the codevectors. \n", codebook_lookup_values);
             AV_DEBUG("  delta %f minmum %f \n", codebook_delta_value, codebook_minimum_value);
@@ -360,20 +363,23 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
             }
 
 // Weed out unused vlcs and build codevector vector
-            codebook_setup->codevectors = used_entries ? av_mallocz(used_entries*codebook_setup->dimensions * sizeof(float)) : NULL;
+            codebook_setup->codevectors = used_entries ? av_mallocz(used_entries *
+                                                                    codebook_setup->dimensions *
+                                                                    sizeof(*codebook_setup->codevectors))
+                                                       : NULL;
             for (j = 0, i = 0; i < entries; ++i) {
-                uint_fast8_t dim = codebook_setup->dimensions;
+                unsigned dim = codebook_setup->dimensions;
 
                 if (tmp_vlc_bits[i]) {
                     float last = 0.0;
-                    uint_fast32_t lookup_offset = i;
+                    unsigned lookup_offset = i;
 
 #ifdef V_DEBUG
-                    av_log(vc->avccontext, AV_LOG_INFO, "Lookup offset %d ,", i);
+                    av_log(vc->avccontext, AV_LOG_INFO, "Lookup offset %u ,", i);
 #endif
 
                     for (k = 0; k < dim; ++k) {
-                        uint_fast32_t multiplicand_offset = lookup_offset % codebook_lookup_values;
+                        unsigned multiplicand_offset = lookup_offset % codebook_lookup_values;
                         codebook_setup->codevectors[j * dim + k] = codebook_multiplicands[multiplicand_offset] * codebook_delta_value + codebook_minimum_value + last;
                         if (codebook_sequence_p)
                             last = codebook_setup->codevectors[j * dim + k];
@@ -382,7 +388,7 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
                     tmp_vlc_bits[j] = tmp_vlc_bits[i];
 
 #ifdef V_DEBUG
-                    av_log(vc->avccontext, AV_LOG_INFO, "real lookup offset %d, vector: ", j);
+                    av_log(vc->avccontext, AV_LOG_INFO, "real lookup offset %u, vector: ", j);
                     for (k = 0; k < dim; ++k)
                         av_log(vc->avccontext, AV_LOG_INFO, " %f ", codebook_setup->codevectors[j * dim + k]);
                     av_log(vc->avccontext, AV_LOG_INFO, "\n");
@@ -442,13 +448,13 @@ error:
 static int vorbis_parse_setup_hdr_tdtransforms(vorbis_context *vc)
 {
     GetBitContext *gb = &vc->gb;
-    uint_fast8_t i;
-    uint_fast8_t vorbis_time_count = get_bits(gb, 6) + 1;
+    unsigned i, vorbis_time_count = get_bits(gb, 6) + 1;
 
     for (i = 0; i < vorbis_time_count; ++i) {
-        uint_fast16_t vorbis_tdtransform = get_bits(gb, 16);
+        unsigned vorbis_tdtransform = get_bits(gb, 16);
 
-        AV_DEBUG(" Vorbis time domain transform %d: %d \n", vorbis_time_count, vorbis_tdtransform);
+        AV_DEBUG(" Vorbis time domain transform %u: %u\n",
+                 vorbis_time_count, vorbis_tdtransform);
 
         if (vorbis_tdtransform) {
             av_log(vc->avccontext, AV_LOG_ERROR, "Vorbis time domain transform data nonzero. \n");
@@ -462,7 +468,7 @@ static int vorbis_parse_setup_hdr_tdtransforms(vorbis_context *vc)
 
 static int vorbis_floor0_decode(vorbis_context *vc,
                                 vorbis_floor_data *vfu, float *vec);
-static void create_map(vorbis_context *vc, uint_fast8_t floor_number);
+static void create_map(vorbis_context *vc, unsigned floor_number);
 static int vorbis_floor1_decode(vorbis_context *vc,
                                 vorbis_floor_data *vfu, float *vec);
 static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
@@ -472,7 +478,7 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
 
     vc->floor_count = get_bits(gb, 6) + 1;
 
-    vc->floors = av_mallocz(vc->floor_count * sizeof(vorbis_floor));
+    vc->floors = av_mallocz(vc->floor_count * sizeof(*vc->floors));
 
     for (i = 0; i < vc->floor_count; ++i) {
         vorbis_floor *floor_setup = &vc->floors[i];
@@ -483,9 +489,7 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
 
         if (floor_setup->floor_type == 1) {
             int maximum_class = -1;
-            uint_fast8_t  rangebits;
-            uint_fast32_t rangemax;
-            uint_fast16_t floor1_values = 2;
+            unsigned rangebits, rangemax, floor1_values = 2;
 
             floor_setup->decode = vorbis_floor1_decode;
 
@@ -532,14 +536,15 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
             for (j = 0; j < floor_setup->data.t1.partitions; ++j)
                 floor_setup->data.t1.x_list_dim+=floor_setup->data.t1.class_dimensions[floor_setup->data.t1.partition_class[j]];
 
-            floor_setup->data.t1.list = av_mallocz(floor_setup->data.t1.x_list_dim * sizeof(vorbis_floor1_entry));
+            floor_setup->data.t1.list = av_mallocz(floor_setup->data.t1.x_list_dim *
+                                                   sizeof(*floor_setup->data.t1.list));
 
 
             rangebits = get_bits(gb, 4);
             rangemax = (1 << rangebits);
             if (rangemax > vc->blocksize[1] / 2) {
                 av_log(vc->avccontext, AV_LOG_ERROR,
-                       "Floor value is too large for blocksize: %"PRIuFAST32" (%"PRIuFAST32")\n",
+                       "Floor value is too large for blocksize: %u (%"PRIuFAST32")\n",
                        rangemax, vc->blocksize[1] / 2);
                 return -1;
             }
@@ -550,14 +555,15 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
                 for (k = 0; k < floor_setup->data.t1.class_dimensions[floor_setup->data.t1.partition_class[j]]; ++k, ++floor1_values) {
                     floor_setup->data.t1.list[floor1_values].x = get_bits(gb, rangebits);
 
-                    AV_DEBUG(" %d. floor1 Y coord. %d \n", floor1_values, floor_setup->data.t1.list[floor1_values].x);
+                    AV_DEBUG(" %u. floor1 Y coord. %d\n", floor1_values,
+                             floor_setup->data.t1.list[floor1_values].x);
                 }
             }
 
 // Precalculate order of x coordinates - needed for decode
             ff_vorbis_ready_floor1_list(floor_setup->data.t1.list, floor_setup->data.t1.x_list_dim);
         } else if (floor_setup->floor_type == 0) {
-            uint_fast8_t max_codebook_dim = 0;
+            unsigned max_codebook_dim = 0;
 
             floor_setup->decode = vorbis_floor0_decode;
 
@@ -583,7 +589,7 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
             /* read book indexes */
             {
                 int idx;
-                uint_fast8_t book_idx;
+                unsigned book_idx;
                 for (idx = 0; idx < floor_setup->data.t0.num_books; ++idx) {
                     GET_VALIDATED_INDEX(book_idx, 8, vc->codebook_count)
                     floor_setup->data.t0.book_list[idx] = book_idx;
@@ -597,8 +603,8 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
             /* codebook dim is for padding if codebook dim doesn't *
              * divide order+1 then we need to read more data       */
             floor_setup->data.t0.lsp =
-                av_malloc((floor_setup->data.t0.order+1 + max_codebook_dim)
-                          * sizeof(float));
+                av_malloc((floor_setup->data.t0.order + 1 + max_codebook_dim)
+                          * sizeof(*floor_setup->data.t0.lsp));
             if (!floor_setup->data.t0.lsp)
                 return -1;
 
@@ -637,22 +643,21 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
 static int vorbis_parse_setup_hdr_residues(vorbis_context *vc)
 {
     GetBitContext *gb = &vc->gb;
-    uint_fast8_t i, j, k;
+    unsigned i, j, k;
 
     vc->residue_count = get_bits(gb, 6)+1;
-    vc->residues      = av_mallocz(vc->residue_count * sizeof(vorbis_residue));
+    vc->residues      = av_mallocz(vc->residue_count * sizeof(*vc->residues));
 
     AV_DEBUG(" There are %d residues. \n", vc->residue_count);
 
     for (i = 0; i < vc->residue_count; ++i) {
         vorbis_residue *res_setup = &vc->residues[i];
         uint_fast8_t cascade[64];
-        uint_fast8_t high_bits;
-        uint_fast8_t low_bits;
+        unsigned high_bits, low_bits;
 
         res_setup->type = get_bits(gb, 16);
 
-        AV_DEBUG(" %d. residue type %d \n", i, res_setup->type);
+        AV_DEBUG(" %u. residue type %d\n", i, res_setup->type);
 
         res_setup->begin          = get_bits(gb, 24);
         res_setup->end            = get_bits(gb, 24);
@@ -686,7 +691,7 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc)
                 high_bits = get_bits(gb, 5);
             cascade[j] = (high_bits << 3) + low_bits;
 
-            AV_DEBUG("     %d class casscade depth: %d \n", j, ilog(cascade[j]));
+            AV_DEBUG("     %u class cascade depth: %d\n", j, ilog(cascade[j]));
         }
 
         res_setup->maxpass = 0;
@@ -695,7 +700,8 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc)
                 if (cascade[j]&(1 << k)) {
                     GET_VALIDATED_INDEX(res_setup->books[j][k], 8, vc->codebook_count)
 
-                    AV_DEBUG("     %d class casscade depth %d book: %d \n", j, k, res_setup->books[j][k]);
+                    AV_DEBUG("     %u class cascade depth %u book: %d\n",
+                             j, k, res_setup->books[j][k]);
 
                     if (k>res_setup->maxpass)
                         res_setup->maxpass = k;
@@ -713,10 +719,10 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc)
 static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc)
 {
     GetBitContext *gb = &vc->gb;
-    uint_fast8_t i, j;
+    unsigned i, j;
 
     vc->mapping_count = get_bits(gb, 6)+1;
-    vc->mappings      = av_mallocz(vc->mapping_count * sizeof(vorbis_mapping));
+    vc->mappings      = av_mallocz(vc->mapping_count * sizeof(*vc->mappings));
 
     AV_DEBUG(" There are %d mappings. \n", vc->mapping_count);
 
@@ -735,8 +741,10 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc)
 
         if (get_bits1(gb)) {
             mapping_setup->coupling_steps = get_bits(gb, 8) + 1;
-            mapping_setup->magnitude      = av_mallocz(mapping_setup->coupling_steps * sizeof(uint_fast8_t));
-            mapping_setup->angle          = av_mallocz(mapping_setup->coupling_steps * sizeof(uint_fast8_t));
+            mapping_setup->magnitude      = av_mallocz(mapping_setup->coupling_steps *
+                                                       sizeof(*mapping_setup->magnitude));
+            mapping_setup->angle          = av_mallocz(mapping_setup->coupling_steps *
+                                                       sizeof(*mapping_setup->angle));
             for (j = 0; j < mapping_setup->coupling_steps; ++j) {
                 GET_VALIDATED_INDEX(mapping_setup->magnitude[j], ilog(vc->audio_channels - 1), vc->audio_channels)
                 GET_VALIDATED_INDEX(mapping_setup->angle[j],     ilog(vc->audio_channels - 1), vc->audio_channels)
@@ -745,15 +753,17 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc)
             mapping_setup->coupling_steps = 0;
         }
 
-        AV_DEBUG("   %d mapping coupling steps: %d \n", i, mapping_setup->coupling_steps);
+        AV_DEBUG("   %u mapping coupling steps: %d\n",
+                 i, mapping_setup->coupling_steps);
 
         if (get_bits(gb, 2)) {
-            av_log(vc->avccontext, AV_LOG_ERROR, "%d. mapping setup data invalid. \n", i);
+            av_log(vc->avccontext, AV_LOG_ERROR, "%u. mapping setup data invalid.\n", i);
             return -1; // following spec.
         }
 
         if (mapping_setup->submaps>1) {
-            mapping_setup->mux = av_mallocz(vc->audio_channels * sizeof(uint_fast8_t));
+            mapping_setup->mux = av_mallocz(vc->audio_channels *
+                                            sizeof(*mapping_setup->mux));
             for (j = 0; j < vc->audio_channels; ++j)
                 mapping_setup->mux[j] = get_bits(gb, 4);
         }
@@ -763,7 +773,10 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc)
             GET_VALIDATED_INDEX(mapping_setup->submap_floor[j],   8, vc->floor_count)
             GET_VALIDATED_INDEX(mapping_setup->submap_residue[j], 8, vc->residue_count)
 
-            AV_DEBUG("   %d mapping %d submap : floor %d, residue %d \n", i, j, mapping_setup->submap_floor[j], mapping_setup->submap_residue[j]);
+            AV_DEBUG("   %u mapping %u submap : floor %d, residue %d\n",
+                     i, j,
+                     mapping_setup->submap_floor[j],
+                     mapping_setup->submap_residue[j]);
         }
     }
     return 0;
@@ -771,14 +784,13 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc)
 
 // Process modes part
 
-static void create_map(vorbis_context *vc, uint_fast8_t floor_number)
+static void create_map(vorbis_context *vc, unsigned floor_number)
 {
     vorbis_floor *floors = vc->floors;
     vorbis_floor0 *vf;
     int idx;
-    int_fast8_t blockflag;
+    int blockflag, n;
     int_fast32_t *map;
-    int_fast32_t n; //TODO: could theoretically be smaller?
 
     for (blockflag = 0; blockflag < 2; ++blockflag) {
         n = vc->blocksize[blockflag] / 2;
@@ -810,10 +822,10 @@ static void create_map(vorbis_context *vc, uint_fast8_t floor_number)
 static int vorbis_parse_setup_hdr_modes(vorbis_context *vc)
 {
     GetBitContext *gb = &vc->gb;
-    uint_fast8_t i;
+    unsigned i;
 
     vc->mode_count = get_bits(gb, 6) + 1;
-    vc->modes      = av_mallocz(vc->mode_count * sizeof(vorbis_mode));
+    vc->modes      = av_mallocz(vc->mode_count * sizeof(*vc->modes));
 
     AV_DEBUG(" There are %d modes.\n", vc->mode_count);
 
@@ -825,7 +837,9 @@ static int vorbis_parse_setup_hdr_modes(vorbis_context *vc)
         mode_setup->transformtype = get_bits(gb, 16); //FIXME check
         GET_VALIDATED_INDEX(mode_setup->mapping, 8, vc->mapping_count);
 
-        AV_DEBUG(" %d mode: blockflag %d, windowtype %d, transformtype %d, mapping %d \n", i, mode_setup->blockflag, mode_setup->windowtype, mode_setup->transformtype, mode_setup->mapping);
+        AV_DEBUG(" %u mode: blockflag %d, windowtype %d, transformtype %d, mapping %d\n",
+                 i, mode_setup->blockflag, mode_setup->windowtype,
+                 mode_setup->transformtype, mode_setup->mapping);
     }
     return 0;
 }
@@ -880,7 +894,7 @@ static int vorbis_parse_setup_hdr(vorbis_context *vc)
 static int vorbis_parse_id_hdr(vorbis_context *vc)
 {
     GetBitContext *gb = &vc->gb;
-    uint_fast8_t bl0, bl1;
+    unsigned bl0, bl1;
 
     if ((get_bits(gb, 8) != 'v') || (get_bits(gb, 8) != 'o') ||
         (get_bits(gb, 8) != 'r') || (get_bits(gb, 8) != 'b') ||
@@ -925,9 +939,9 @@ static int vorbis_parse_id_hdr(vorbis_context *vc)
         return -2;
     }
 
-    vc->channel_residues =  av_malloc((vc->blocksize[1]  / 2) * vc->audio_channels * sizeof(float));
-    vc->channel_floors   =  av_malloc((vc->blocksize[1]  / 2) * vc->audio_channels * sizeof(float));
-    vc->saved            =  av_mallocz((vc->blocksize[1] / 4) * vc->audio_channels * sizeof(float));
+    vc->channel_residues =  av_malloc((vc->blocksize[1]  / 2) * vc->audio_channels * sizeof(*vc->channel_residues));
+    vc->channel_floors   =  av_malloc((vc->blocksize[1]  / 2) * vc->audio_channels * sizeof(*vc->channel_floors));
+    vc->saved            =  av_mallocz((vc->blocksize[1] / 4) * vc->audio_channels * sizeof(*vc->saved));
     vc->previous_window  = 0;
 
     ff_mdct_init(&vc->mdct[0], bl0, 1, -vc->scale_bias);
@@ -1023,15 +1037,13 @@ static int vorbis_floor0_decode(vorbis_context *vc,
 {
     vorbis_floor0 *vf = &vfu->t0;
     float *lsp = vf->lsp;
-    uint_fast32_t amplitude;
-    uint_fast32_t book_idx;
-    uint_fast8_t blockflag = vc->modes[vc->mode_number].blockflag;
+    unsigned amplitude, book_idx;
+    unsigned blockflag = vc->modes[vc->mode_number].blockflag;
 
     amplitude = get_bits(&vc->gb, vf->amplitude_bits);
     if (amplitude > 0) {
         float last = 0;
-        uint_fast16_t lsp_len = 0;
-        uint_fast16_t idx;
+        unsigned idx, lsp_len = 0;
         vorbis_codebook codebook;
 
         book_idx = get_bits(&vc->gb, ilog(vf->num_books));
@@ -1134,20 +1146,12 @@ static int vorbis_floor1_decode(vorbis_context *vc,
     vorbis_floor1 *vf = &vfu->t1;
     GetBitContext *gb = &vc->gb;
     uint_fast16_t range_v[4] = { 256, 128, 86, 64 };
-    uint_fast16_t range = range_v[vf->multiplier-1];
+    unsigned range = range_v[vf->multiplier-1];
     uint_fast16_t floor1_Y[258];
     uint_fast16_t floor1_Y_final[258];
     int floor1_flag[258];
-    uint_fast8_t class_;
-    uint_fast8_t cdim;
-    uint_fast8_t cbits;
-    uint_fast8_t csub;
-    uint_fast8_t cval;
-    int_fast16_t book;
-    uint_fast16_t offset;
-    uint_fast16_t i,j;
-    int_fast16_t adx, ady, dy, off, predicted;
-    int_fast32_t err;
+    unsigned partition_class, cdim, cbits, csub, cval, offset, i, j;
+    int book, adx, ady, dy, off, predicted, err;
 
 
     if (!get_bits1(gb)) // silence
@@ -1162,22 +1166,23 @@ static int vorbis_floor1_decode(vorbis_context *vc,
 
     offset = 2;
     for (i = 0; i < vf->partitions; ++i) {
-        class_ = vf->partition_class[i];
-        cdim   = vf->class_dimensions[class_];
-        cbits  = vf->class_subclasses[class_];
+        partition_class = vf->partition_class[i];
+        cdim   = vf->class_dimensions[partition_class];
+        cbits  = vf->class_subclasses[partition_class];
         csub = (1 << cbits) - 1;
         cval = 0;
 
-        AV_DEBUG("Cbits %d \n", cbits);
+        AV_DEBUG("Cbits %u\n", cbits);
 
         if (cbits) // this reads all subclasses for this partition's class
-            cval = get_vlc2(gb, vc->codebooks[vf->class_masterbook[class_]].vlc.table,
-                            vc->codebooks[vf->class_masterbook[class_]].nb_bits, 3);
+            cval = get_vlc2(gb, vc->codebooks[vf->class_masterbook[partition_class]].vlc.table,
+                            vc->codebooks[vf->class_masterbook[partition_class]].nb_bits, 3);
 
         for (j = 0; j < cdim; ++j) {
-            book = vf->subclass_books[class_][cval & csub];
+            book = vf->subclass_books[partition_class][cval & csub];
 
-            AV_DEBUG("book %d Cbits %d cval %d  bits:%d \n", book, cbits, cval, get_bits_count(gb));
+            AV_DEBUG("book %d Cbits %u cval %u  bits:%d\n",
+                     book, cbits, cval, get_bits_count(gb));
 
             cval = cval >> cbits;
             if (book > -1) {
@@ -1200,9 +1205,7 @@ static int vorbis_floor1_decode(vorbis_context *vc,
     floor1_Y_final[1] = floor1_Y[1];
 
     for (i = 2; i < vf->x_list_dim; ++i) {
-        uint_fast16_t val, highroom, lowroom, room;
-        uint_fast16_t high_neigh_offs;
-        uint_fast16_t low_neigh_offs;
+        unsigned val, highroom, lowroom, room, high_neigh_offs, low_neigh_offs;
 
         low_neigh_offs  = vf->list[i].low;
         high_neigh_offs = vf->list[i].high;
@@ -1247,7 +1250,8 @@ static int vorbis_floor1_decode(vorbis_context *vc,
             floor1_Y_final[i] = predicted;
         }
 
-        AV_DEBUG(" Decoded floor(%d) = %d / val %d \n", vf->list[i].x, floor1_Y_final[i], val);
+        AV_DEBUG(" Decoded floor(%d) = %d / val %u\n",
+                 vf->list[i].x, floor1_Y_final[i], val);
     }
 
 // Curve synth - connect the calculated dots and convert from dB scale FIXME optimize ?
@@ -1263,20 +1267,17 @@ static int vorbis_floor1_decode(vorbis_context *vc,
 
 static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc,
                                                            vorbis_residue *vr,
-                                                           uint_fast8_t ch,
+                                                           unsigned ch,
                                                            uint_fast8_t *do_not_decode,
                                                            float *vec,
-                                                           uint_fast16_t vlen,
+                                                           unsigned vlen,
                                                            int vr_type)
 {
     GetBitContext *gb = &vc->gb;
-    uint_fast8_t c_p_c = vc->codebooks[vr->classbook].dimensions;
-    uint_fast16_t ptns_to_read = vr->ptns_to_read;
+    unsigned c_p_c        = vc->codebooks[vr->classbook].dimensions;
+    unsigned ptns_to_read = vr->ptns_to_read;
     uint8_t *classifs = vr->classifs;
-    uint_fast8_t pass;
-    uint_fast8_t ch_used;
-    uint_fast8_t i,j,l;
-    uint_fast16_t k;
+    unsigned pass, ch_used, i, j, k, l;
 
     if (vr_type == 2) {
         for (j = 1; j < ch; ++j)
@@ -1291,26 +1292,24 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc,
     AV_DEBUG(" residue type 0/1/2 decode begin, ch: %d  cpc %d  \n", ch, c_p_c);
 
     for (pass = 0; pass <= vr->maxpass; ++pass) { // FIXME OPTIMIZE?
-        uint_fast16_t voffset;
-        uint_fast16_t partition_count;
-        uint_fast16_t j_times_ptns_to_read;
+        uint16_t voffset, partition_count, j_times_ptns_to_read;
 
         voffset = vr->begin;
         for (partition_count = 0; partition_count < ptns_to_read;) {  // SPEC        error
             if (!pass) {
-                uint_fast32_t inverse_class = ff_inverse[vr->classifications];
+                unsigned inverse_class = ff_inverse[vr->classifications];
                 for (j_times_ptns_to_read = 0, j = 0; j < ch_used; ++j) {
                     if (!do_not_decode[j]) {
-                        uint_fast32_t temp = get_vlc2(gb, vc->codebooks[vr->classbook].vlc.table,
-                        vc->codebooks[vr->classbook].nb_bits, 3);
+                        unsigned temp = get_vlc2(gb, vc->codebooks[vr->classbook].vlc.table,
+                                                 vc->codebooks[vr->classbook].nb_bits, 3);
 
-                        AV_DEBUG("Classword: %d \n", temp);
+                        AV_DEBUG("Classword: %u\n", temp);
 
                         assert(vr->classifications > 1 && temp <= 65536); //needed for inverse[]
                         for (i = 0; i < c_p_c; ++i) {
-                            uint_fast32_t temp2;
+                            unsigned temp2;
 
-                            temp2 = (((uint_fast64_t)temp) * inverse_class) >> 32;
+                            temp2 = (((uint64_t)temp) * inverse_class) >> 32;
                             if (partition_count + c_p_c - 1 - i < ptns_to_read)
                                 classifs[j_times_ptns_to_read + partition_count + c_p_c - 1 - i] = temp - temp2 * vr->classifications;
                             temp = temp2;
@@ -1321,17 +1320,17 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc,
             }
             for (i = 0; (i < c_p_c) && (partition_count < ptns_to_read); ++i) {
                 for (j_times_ptns_to_read = 0, j = 0; j < ch_used; ++j) {
-                    uint_fast16_t voffs;
+                    unsigned voffs;
 
                     if (!do_not_decode[j]) {
-                        uint_fast8_t vqclass = classifs[j_times_ptns_to_read+partition_count];
-                        int_fast16_t vqbook = vr->books[vqclass][pass];
+                        unsigned vqclass = classifs[j_times_ptns_to_read + partition_count];
+                        int vqbook  = vr->books[vqclass][pass];
 
                         if (vqbook >= 0 && vc->codebooks[vqbook].codevectors) {
-                            uint_fast16_t coffs;
-                            unsigned dim =  vc->codebooks[vqbook].dimensions; // not uint_fast8_t: 64bit is slower here on amd64
-                            uint_fast16_t step = dim == 1 ? vr->partition_size
-                                                          : FASTDIV(vr->partition_size, dim);
+                            unsigned coffs;
+                            unsigned dim  = vc->codebooks[vqbook].dimensions;
+                            unsigned step = dim == 1 ? vr->partition_size
+                                                     : FASTDIV(vr->partition_size, dim);
                             vorbis_codebook codebook = vc->codebooks[vqbook];
 
                             if (vr_type == 0) {
@@ -1405,9 +1404,9 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc,
 }
 
 static inline int vorbis_residue_decode(vorbis_context *vc, vorbis_residue *vr,
-                                        uint_fast8_t ch,
+                                        unsigned ch,
                                         uint_fast8_t *do_not_decode,
-                                        float *vec, uint_fast16_t vlen)
+                                        float *vec, unsigned vlen)
 {
     if (vr->type == 2)
         return vorbis_residue_decode_internal(vc, vr, ch, do_not_decode, vec, vlen, 2);
@@ -1451,19 +1450,17 @@ static int vorbis_parse_audio_packet(vorbis_context *vc)
 {
     GetBitContext *gb = &vc->gb;
     FFTContext *mdct;
-    uint_fast8_t previous_window = vc->previous_window;
-    uint_fast8_t mode_number;
-    uint_fast8_t blockflag;
-    uint_fast16_t blocksize;
-    int_fast32_t i,j;
+    unsigned previous_window = vc->previous_window;
+    unsigned mode_number, blockflag, blocksize;
+    int i, j;
     uint_fast8_t no_residue[255];
     uint_fast8_t do_not_decode[255];
     vorbis_mapping *mapping;
     float *ch_res_ptr   = vc->channel_residues;
     float *ch_floor_ptr = vc->channel_floors;
     uint_fast8_t res_chan[255];
-    uint_fast8_t res_num = 0;
-    int_fast16_t retlen  = 0;
+    unsigned res_num = 0;
+    int retlen  = 0;
 
     if (get_bits1(gb)) {
         av_log(vc->avccontext, AV_LOG_ERROR, "Not a Vorbis I audio packet.\n");
@@ -1478,7 +1475,8 @@ static int vorbis_parse_audio_packet(vorbis_context *vc)
     vc->mode_number = mode_number;
     mapping = &vc->mappings[vc->modes[mode_number].mapping];
 
-    AV_DEBUG(" Mode number: %d , mapping: %d , blocktype %d \n", mode_number, vc->modes[mode_number].mapping, vc->modes[mode_number].blockflag);
+    AV_DEBUG(" Mode number: %u , mapping: %d , blocktype %d\n", mode_number,
+             vc->modes[mode_number].mapping, vc->modes[mode_number].blockflag);
 
     blockflag = vc->modes[mode_number].blockflag;
     blocksize = vc->blocksize[blockflag];
@@ -1522,7 +1520,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc)
 
     for (i = 0; i < mapping->submaps; ++i) {
         vorbis_residue *residue;
-        uint_fast8_t ch = 0;
+        unsigned ch = 0;
 
         for (j = 0; j < vc->audio_channels; ++j) {
             if ((mapping->submaps == 1) || (i == mapping->mux[j])) {
@@ -1567,8 +1565,8 @@ static int vorbis_parse_audio_packet(vorbis_context *vc)
 
     retlen = (blocksize + vc->blocksize[previous_window]) / 4;
     for (j = 0; j < vc->audio_channels; j++) {
-        uint_fast16_t bs0 = vc->blocksize[0];
-        uint_fast16_t bs1 = vc->blocksize[1];
+        unsigned bs0 = vc->blocksize[0];
+        unsigned bs1 = vc->blocksize[1];
         float *residue    = vc->channel_residues + res_chan[j] * blocksize / 2;
         float *saved      = vc->saved + j * bs1 / 4;
         float *ret        = vc->channel_floors + j * retlen;
@@ -1602,9 +1600,7 @@ static int vorbis_decode_frame(AVCodecContext *avccontext,
     vorbis_context *vc = avccontext->priv_data ;
     GetBitContext *gb = &(vc->gb);
     const float *channel_ptrs[255];
-    int i;
-
-    int_fast16_t len;
+    int i, len;
 
     if (!buf_size)
         return 0;
diff --git a/libavcodec/wmadec.c b/libavcodec/wmadec.c
index 3601e47af1..3da1a60c19 100644
--- a/libavcodec/wmadec.c
+++ b/libavcodec/wmadec.c
@@ -103,7 +103,7 @@ static int wma_decode_init(AVCodecContext * avctx)
     s->use_variable_block_len = flags2 & 0x0004;
 
     if(avctx->codec->id == CODEC_ID_WMAV2 && avctx->extradata_size >= 8){
-        if(!AV_RL16(extradata+6) && s->use_variable_block_len){
+        if(AV_RL16(extradata+4)==0xd && s->use_variable_block_len){
             av_log(avctx, AV_LOG_WARNING, "Disabling use_variable_block_len, if this fails contact the ffmpeg developers and send us the file\n");
             s->use_variable_block_len= 0; // this fixes issue1503
         }
@@ -185,15 +185,6 @@ static void wma_lsp_to_curve_init(WMACodecContext *s, int frame_len)
         s->lsp_pow_m_table2[i] = b - a;
         b = a;
     }
-#if 0
-    for(i=1;i<20;i++) {
-        float v, r1, r2;
-        v = 5.0 / i;
-        r1 = pow_m1_4(s, v);
-        r2 = pow(v,-0.25);
-        printf("%f^-0.25=%f e=%f\n", v, r1, r2 - r1);
-    }
-#endif
 }
 
 /**
diff --git a/libavcodec/wmv2dec.c b/libavcodec/wmv2dec.c
index 768b819479..3214aae42b 100644
--- a/libavcodec/wmv2dec.c
+++ b/libavcodec/wmv2dec.c
@@ -116,16 +116,6 @@ int ff_wmv2_decode_picture_header(MpegEncContext * s)
     Wmv2Context * const w= (Wmv2Context*)s;
     int code;
 
-#if 0
-{
-int i;
-for(i=0; i<s->gb.size*8; i++)
-    printf("%d", get_bits1(&s->gb));
-//    get_bits1(&s->gb);
-printf("END\n");
-return -1;
-}
-#endif
     if(s->picture_number==0)
         decode_ext_header(w);
 
@@ -316,10 +306,6 @@ static inline int wmv2_decode_inter_block(Wmv2Context *w, DCTELEM *block, int n,
 
     if(w->per_block_abt)
         w->abt_type= decode012(&s->gb);
-#if 0
-    if(w->per_block_abt)
-        printf("B%d", w->abt_type);
-#endif
     w->abt_type_table[n]= w->abt_type;
 
     if(w->abt_type){
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index f8d456d3ea..6416e600a3 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -25,6 +25,7 @@ MMX-OBJS-$(CONFIG_MP2FLOAT_DECODER)    += x86/mpegaudiodec_mmx.o
 MMX-OBJS-$(CONFIG_MP3FLOAT_DECODER)    += x86/mpegaudiodec_mmx.o
 MMX-OBJS-$(CONFIG_MP3ON4FLOAT_DECODER) += x86/mpegaudiodec_mmx.o
 MMX-OBJS-$(CONFIG_MP3ADUFLOAT_DECODER) += x86/mpegaudiodec_mmx.o
+MMX-OBJS-$(CONFIG_PNG_DECODER)         += x86/png_mmx.o
 MMX-OBJS-$(CONFIG_ENCODERS)            += x86/dsputilenc_mmx.o
 YASM-OBJS-$(CONFIG_ENCODERS)           += x86/dsputilenc_yasm.o
 MMX-OBJS-$(CONFIG_GPL)                 += x86/idct_mmx.o
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 1b17d0ec7e..985a15d2f1 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -579,28 +579,6 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
         dst[i+0] += src[i+0];
 }
 
-static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
-    x86_reg i=0;
-    __asm__ volatile(
-        "jmp 2f                         \n\t"
-        "1:                             \n\t"
-        "movq   (%2, %0), %%mm0         \n\t"
-        "movq  8(%2, %0), %%mm1         \n\t"
-        "paddb  (%3, %0), %%mm0         \n\t"
-        "paddb 8(%3, %0), %%mm1         \n\t"
-        "movq %%mm0,  (%1, %0)          \n\t"
-        "movq %%mm1, 8(%1, %0)          \n\t"
-        "add $16, %0                    \n\t"
-        "2:                             \n\t"
-        "cmp %4, %0                     \n\t"
-        " js 1b                         \n\t"
-        : "+r" (i)
-        : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15)
-    );
-    for(; i<w; i++)
-        dst[i] = src1[i] + src2[i];
-}
-
 #if HAVE_7REGS && HAVE_TEN_OPERANDS
 static void add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top) {
     x86_reg w2 = -w;
@@ -876,80 +854,6 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w,
     }
 }
 
-#define PAETH(cpu, abs3)\
-static void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
-{\
-    x86_reg i = -bpp;\
-    x86_reg end = w-3;\
-    __asm__ volatile(\
-        "pxor      %%mm7, %%mm7 \n"\
-        "movd    (%1,%0), %%mm0 \n"\
-        "movd    (%2,%0), %%mm1 \n"\
-        "punpcklbw %%mm7, %%mm0 \n"\
-        "punpcklbw %%mm7, %%mm1 \n"\
-        "add       %4, %0 \n"\
-        "1: \n"\
-        "movq      %%mm1, %%mm2 \n"\
-        "movd    (%2,%0), %%mm1 \n"\
-        "movq      %%mm2, %%mm3 \n"\
-        "punpcklbw %%mm7, %%mm1 \n"\
-        "movq      %%mm2, %%mm4 \n"\
-        "psubw     %%mm1, %%mm3 \n"\
-        "psubw     %%mm0, %%mm4 \n"\
-        "movq      %%mm3, %%mm5 \n"\
-        "paddw     %%mm4, %%mm5 \n"\
-        abs3\
-        "movq      %%mm4, %%mm6 \n"\
-        "pminsw    %%mm5, %%mm6 \n"\
-        "pcmpgtw   %%mm6, %%mm3 \n"\
-        "pcmpgtw   %%mm5, %%mm4 \n"\
-        "movq      %%mm4, %%mm6 \n"\
-        "pand      %%mm3, %%mm4 \n"\
-        "pandn     %%mm3, %%mm6 \n"\
-        "pandn     %%mm0, %%mm3 \n"\
-        "movd    (%3,%0), %%mm0 \n"\
-        "pand      %%mm1, %%mm6 \n"\
-        "pand      %%mm4, %%mm2 \n"\
-        "punpcklbw %%mm7, %%mm0 \n"\
-        "movq      %6,    %%mm5 \n"\
-        "paddw     %%mm6, %%mm0 \n"\
-        "paddw     %%mm2, %%mm3 \n"\
-        "paddw     %%mm3, %%mm0 \n"\
-        "pand      %%mm5, %%mm0 \n"\
-        "movq      %%mm0, %%mm3 \n"\
-        "packuswb  %%mm3, %%mm3 \n"\
-        "movd      %%mm3, (%1,%0) \n"\
-        "add       %4, %0 \n"\
-        "cmp       %5, %0 \n"\
-        "jle 1b \n"\
-        :"+r"(i)\
-        :"r"(dst), "r"(top), "r"(src), "r"((x86_reg)bpp), "g"(end),\
-         "m"(ff_pw_255)\
-        :"memory"\
-    );\
-}
-
-#define ABS3_MMX2\
-        "psubw     %%mm5, %%mm7 \n"\
-        "pmaxsw    %%mm7, %%mm5 \n"\
-        "pxor      %%mm6, %%mm6 \n"\
-        "pxor      %%mm7, %%mm7 \n"\
-        "psubw     %%mm3, %%mm6 \n"\
-        "psubw     %%mm4, %%mm7 \n"\
-        "pmaxsw    %%mm6, %%mm3 \n"\
-        "pmaxsw    %%mm7, %%mm4 \n"\
-        "pxor      %%mm7, %%mm7 \n"
-
-#define ABS3_SSSE3\
-        "pabsw     %%mm3, %%mm3 \n"\
-        "pabsw     %%mm4, %%mm4 \n"\
-        "pabsw     %%mm5, %%mm5 \n"
-
-PAETH(mmx2, ABS3_MMX2)
-#if HAVE_SSSE3
-PAETH(ssse3, ABS3_SSSE3)
-#endif
-
 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
         "paddw " #m4 ", " #m3 "           \n\t" /* x1 */\
         "movq "MANGLE(ff_pw_20)", %%mm4   \n\t" /* 20 */\
@@ -2537,7 +2441,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 #endif
 
         c->add_bytes= add_bytes_mmx;
-        c->add_bytes_l2= add_bytes_l2_mmx;
 
         if (!h264_high_depth)
         c->draw_edges = draw_edges_mmx;
@@ -2658,7 +2561,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
                 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
 #endif
 
-            c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2;
         } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
             c->prefetch = prefetch_3dnow;
 
@@ -2772,7 +2674,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             H264_QPEL_FUNCS(3, 2, ssse3);
             H264_QPEL_FUNCS(3, 3, ssse3);
             }
-            c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
 #if HAVE_YASM
             if (!h264_high_depth) {
             c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
diff --git a/libavcodec/x86/mpegvideo_mmx_template.c b/libavcodec/x86/mpegvideo_mmx_template.c
index 0df4600a11..fb52159576 100644
--- a/libavcodec/x86/mpegvideo_mmx_template.c
+++ b/libavcodec/x86/mpegvideo_mmx_template.c
@@ -116,22 +116,11 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
             q = s->c_dc_scale;
         /* note: block[0] is assumed to be positive */
         if (!s->h263_aic) {
-#if 1
         __asm__ volatile (
                 "mul %%ecx                \n\t"
                 : "=d" (level), "=a"(dummy)
                 : "a" ((block[0]>>2) + q), "c" (ff_inverse[q<<1])
         );
-#else
-        __asm__ volatile (
-                "xorl %%edx, %%edx        \n\t"
-                "divw %%cx                \n\t"
-                "movzwl %%ax, %%eax       \n\t"
-                : "=a" (level)
-                : "a" ((block[0]>>2) + q), "c" (q<<1)
-                : "%edx"
-        );
-#endif
         } else
             /* For AIC we skip quant/dequant of INTRADC */
             level = (block[0] + 4)>>3;
diff --git a/libavcodec/x86/png_mmx.c b/libavcodec/x86/png_mmx.c
new file mode 100644
index 0000000000..3b284122ff
--- /dev/null
+++ b/libavcodec/x86/png_mmx.c
@@ -0,0 +1,143 @@
+/*
+ * MMX optimized PNG utils
+ * Copyright (c) 2008 Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/x86_cpu.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/png.h"
+#include "dsputil_mmx.h"
+
+//#undef NDEBUG
+//#include <assert.h>
+
+static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
+{
+    x86_reg i=0;
+    __asm__ volatile(
+        "jmp 2f                         \n\t"
+        "1:                             \n\t"
+        "movq   (%2, %0), %%mm0         \n\t"
+        "movq  8(%2, %0), %%mm1         \n\t"
+        "paddb  (%3, %0), %%mm0         \n\t"
+        "paddb 8(%3, %0), %%mm1         \n\t"
+        "movq %%mm0,  (%1, %0)          \n\t"
+        "movq %%mm1, 8(%1, %0)          \n\t"
+        "add $16, %0                    \n\t"
+        "2:                             \n\t"
+        "cmp %4, %0                     \n\t"
+        " js 1b                         \n\t"
+        : "+r" (i)
+        : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15)
+    );
+    for(; i<w; i++)
+        dst[i] = src1[i] + src2[i];
+}
+
+#define PAETH(cpu, abs3)\
+static void add_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
+{\
+    x86_reg i, end;\
+    if(bpp>4) add_paeth_prediction_##cpu(dst+bpp/2, src+bpp/2, top+bpp/2, w-bpp/2, -bpp);\
+    if(bpp<0) bpp=-bpp;\
+    i= -bpp;\
+    end = w-3;\
+    __asm__ volatile(\
+        "pxor      %%mm7, %%mm7 \n"\
+        "movd    (%1,%0), %%mm0 \n"\
+        "movd    (%2,%0), %%mm1 \n"\
+        "punpcklbw %%mm7, %%mm0 \n"\
+        "punpcklbw %%mm7, %%mm1 \n"\
+        "add       %4, %0 \n"\
+        "1: \n"\
+        "movq      %%mm1, %%mm2 \n"\
+        "movd    (%2,%0), %%mm1 \n"\
+        "movq      %%mm2, %%mm3 \n"\
+        "punpcklbw %%mm7, %%mm1 \n"\
+        "movq      %%mm2, %%mm4 \n"\
+        "psubw     %%mm1, %%mm3 \n"\
+        "psubw     %%mm0, %%mm4 \n"\
+        "movq      %%mm3, %%mm5 \n"\
+        "paddw     %%mm4, %%mm5 \n"\
+        abs3\
+        "movq      %%mm4, %%mm6 \n"\
+        "pminsw    %%mm5, %%mm6 \n"\
+        "pcmpgtw   %%mm6, %%mm3 \n"\
+        "pcmpgtw   %%mm5, %%mm4 \n"\
+        "movq      %%mm4, %%mm6 \n"\
+        "pand      %%mm3, %%mm4 \n"\
+        "pandn     %%mm3, %%mm6 \n"\
+        "pandn     %%mm0, %%mm3 \n"\
+        "movd    (%3,%0), %%mm0 \n"\
+        "pand      %%mm1, %%mm6 \n"\
+        "pand      %%mm4, %%mm2 \n"\
+        "punpcklbw %%mm7, %%mm0 \n"\
+        "paddw     %%mm6, %%mm0 \n"\
+        "paddw     %%mm2, %%mm3 \n"\
+        "paddw     %%mm3, %%mm0 \n"\
+        "pand      %6   , %%mm0 \n"\
+        "movq      %%mm0, %%mm3 \n"\
+        "packuswb  %%mm3, %%mm3 \n"\
+        "movd      %%mm3, (%1,%0) \n"\
+        "add       %4, %0 \n"\
+        "cmp       %5, %0 \n"\
+        "jle 1b \n"\
+        :"+r"(i)\
+        :"r"(dst), "r"(top), "r"(src), "r"((x86_reg)bpp), "g"(end),\
+         "m"(ff_pw_255)\
+        :"memory"\
+    );\
+}
+
+#define ABS3_MMX2\
+        "psubw     %%mm5, %%mm7 \n"\
+        "pmaxsw    %%mm7, %%mm5 \n"\
+        "pxor      %%mm6, %%mm6 \n"\
+        "pxor      %%mm7, %%mm7 \n"\
+        "psubw     %%mm3, %%mm6 \n"\
+        "psubw     %%mm4, %%mm7 \n"\
+        "pmaxsw    %%mm6, %%mm3 \n"\
+        "pmaxsw    %%mm7, %%mm4 \n"\
+        "pxor      %%mm7, %%mm7 \n"
+
+#define ABS3_SSSE3\
+        "pabsw     %%mm3, %%mm3 \n"\
+        "pabsw     %%mm4, %%mm4 \n"\
+        "pabsw     %%mm5, %%mm5 \n"
+
+PAETH(mmx2, ABS3_MMX2)
+#if HAVE_SSSE3
+PAETH(ssse3, ABS3_SSSE3)
+#endif
+
+void ff_png_init_mmx(PNGDecContext *s)
+{
+    int mm_flags = av_get_cpu_flags();
+
+    if (mm_flags & AV_CPU_FLAG_MMX2) {
+        s->add_bytes_l2 = add_bytes_l2_mmx;
+        s->add_paeth_prediction = add_paeth_prediction_mmx2;
+#if HAVE_SSSE3
+        if (mm_flags & AV_CPU_FLAG_SSSE3)
+            s->add_paeth_prediction = add_paeth_prediction_ssse3;
+#endif
+    }
+}
diff --git a/libavcodec/xsubdec.c b/libavcodec/xsubdec.c
index a577ac8f28..d7babe4cd2 100644
--- a/libavcodec/xsubdec.c
+++ b/libavcodec/xsubdec.c
@@ -54,6 +54,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     int w, h, x, y, rlelen, i;
     int64_t packet_time = 0;
     GetBitContext gb;
+    int has_alpha = avctx->codec_tag == MKTAG('D','X','S','A');
 
     // check that at least header fits
     if (buf_size < 27 + 7 * 2 + 4 * 3) {
@@ -100,8 +101,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     for (i = 0; i < sub->rects[0]->nb_colors; i++)
         ((uint32_t*)sub->rects[0]->pict.data[1])[i] = bytestream_get_be24(&buf);
     // make all except background (first entry) non-transparent
-    for (i = 1; i < sub->rects[0]->nb_colors; i++)
-        ((uint32_t*)sub->rects[0]->pict.data[1])[i] |= 0xff000000;
+    for (i = 0; i < sub->rects[0]->nb_colors; i++)
+        ((uint32_t*)sub->rects[0]->pict.data[1])[i] |= (has_alpha ? *buf++ : (i ? 0xff : 0)) << 24;
 
     // process RLE-compressed data
     rlelen = FFMIN(rlelen, buf_end - buf);
author	Michael Niedermayer <michaelni@gmx.at>	2011-05-02 04:18:04 +0200
committer	Michael Niedermayer <michaelni@gmx.at>	2011-05-02 04:18:04 +0200
commit	d4b98d475f8abf9f05ebe91b3fce341aa4b902ee (patch)
tree	bc452a41fbc304d19ee058da006e9760cb160d6a /libavcodec
parent	8d8962ca3e95b1ce86ab505498e7cd3ec89fa996 (diff)
parent	1a9f9f81b1244b952126bb65bc741b04d3534f81 (diff)
download	ffmpeg-d4b98d475f8abf9f05ebe91b3fce341aa4b902ee.tar.gz