diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-01-17 01:40:45 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-01-17 02:37:30 +0100 |
commit | 67f5650a78de2567c58dbd7545434cc6d3ef9b7e (patch) | |
tree | 34b08ed769cd7a1f071bf9ff4eca1348481c0bf1 /libavcodec | |
parent | 905c4dc2b0d564e1b9b6bc6eeca0b8915b81cd8c (diff) | |
parent | 9e12002f114d7e0b0ef69519518cdc0391e5e198 (diff) | |
download | ffmpeg-67f5650a78de2567c58dbd7545434cc6d3ef9b7e.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
rv34: add NEON rv34_idct_add
rv34: 1-pass inter MB reconstruction
add SMJPEG muxer
avformat: split out common SMJPEG code
pictordec: Use bytestream2 functions
avconv: use avcodec_encode_audio2()
pcmenc: use AVCodec.encode2()
avcodec: bump minor version and add APIChanges for the new audio encoding API
avcodec: Add avcodec_encode_audio2() as replacement for avcodec_encode_audio()
avcodec: add a public function, avcodec_fill_audio_frame().
rv34: Intra 16x16 handling
rv34: Inter/intra MB code split
Conflicts:
Changelog
libavcodec/avcodec.h
libavcodec/pictordec.c
libavcodec/utils.c
libavcodec/version.h
libavcodec/x86/rv34dsp.asm
libavformat/version.h
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/arm/rv34dsp_init_neon.c | 14 | ||||
-rw-r--r-- | libavcodec/arm/rv34dsp_neon.S | 106 | ||||
-rw-r--r-- | libavcodec/avcodec.h | 92 | ||||
-rw-r--r-- | libavcodec/internal.h | 25 | ||||
-rw-r--r-- | libavcodec/pcm.c | 28 | ||||
-rw-r--r-- | libavcodec/pictordec.c | 103 | ||||
-rw-r--r-- | libavcodec/rv34.c | 484 | ||||
-rw-r--r-- | libavcodec/rv34dsp.c | 61 | ||||
-rw-r--r-- | libavcodec/rv34dsp.h | 10 | ||||
-rw-r--r-- | libavcodec/utils.c | 343 | ||||
-rw-r--r-- | libavcodec/version.h | 5 | ||||
-rw-r--r-- | libavcodec/x86/rv34dsp.asm | 83 | ||||
-rw-r--r-- | libavcodec/x86/rv34dsp_init.c | 13 |
13 files changed, 987 insertions, 380 deletions
diff --git a/libavcodec/arm/rv34dsp_init_neon.c b/libavcodec/arm/rv34dsp_init_neon.c index 16bda46658..744818cee3 100644 --- a/libavcodec/arm/rv34dsp_init_neon.c +++ b/libavcodec/arm/rv34dsp_init_neon.c @@ -23,16 +23,18 @@ #include "libavcodec/avcodec.h" #include "libavcodec/rv34dsp.h" -void ff_rv34_inv_transform_neon(DCTELEM *block); void ff_rv34_inv_transform_noround_neon(DCTELEM *block); -void ff_rv34_inv_transform_dc_neon(DCTELEM *block); void ff_rv34_inv_transform_noround_dc_neon(DCTELEM *block); +void ff_rv34_idct_add_neon(uint8_t *dst, int stride, DCTELEM *block); +void ff_rv34_idct_dc_add_neon(uint8_t *dst, int stride, int dc); + void ff_rv34dsp_init_neon(RV34DSPContext *c, DSPContext* dsp) { - c->rv34_inv_transform_tab[0] = ff_rv34_inv_transform_neon; - c->rv34_inv_transform_tab[1] = ff_rv34_inv_transform_noround_neon; - c->rv34_inv_transform_dc_tab[0] = ff_rv34_inv_transform_dc_neon; - c->rv34_inv_transform_dc_tab[1] = ff_rv34_inv_transform_noround_dc_neon; + c->rv34_inv_transform = ff_rv34_inv_transform_noround_neon; + c->rv34_inv_transform_dc = ff_rv34_inv_transform_noround_dc_neon; + + c->rv34_idct_add = ff_rv34_idct_add_neon; + c->rv34_idct_dc_add = ff_rv34_idct_dc_add_neon; } diff --git a/libavcodec/arm/rv34dsp_neon.S b/libavcodec/arm/rv34dsp_neon.S index 1e8d4b49a1..15a015deef 100644 --- a/libavcodec/arm/rv34dsp_neon.S +++ b/libavcodec/arm/rv34dsp_neon.S @@ -19,13 +19,10 @@ */ #include "asm.S" +#include "neon.S" -.macro rv34_inv_transform - mov r1, #16 - vld1.16 {d28}, [r0,:64], r1 @ block[i+8*0] - vld1.16 {d29}, [r0,:64], r1 @ block[i+8*1] - vld1.16 {d30}, [r0,:64], r1 @ block[i+8*2] - vld1.16 {d31}, [r0,:64], r1 @ block[i+8*3] +.macro rv34_inv_transform r0 + vld1.16 {q14-q15}, [\r0,:128] vmov.s16 d0, #13 vshll.s16 q12, d29, #3 vshll.s16 q13, d29, #4 @@ -35,12 +32,12 @@ vmlal.s16 q10, d30, d0 vmull.s16 q11, d28, d0 vmlsl.s16 q11, d30, d0 - vsubw.s16 q12, q12, d29 @ z2 = block[i+8*1]*7 - vaddw.s16 q13, q13, d29 @ z3 = block[i+8*1]*17 + vsubw.s16 q12, q12, d29 @ z2 = block[i+4*1]*7 + vaddw.s16 q13, q13, d29 @ z3 = block[i+4*1]*17 vsubw.s16 q9, q9, d31 vaddw.s16 q1, q1, d31 - vadd.s32 q13, q13, q9 @ z3 = 17*block[i+8*1] + 7*block[i+8*3] - vsub.s32 q12, q12, q1 @ z2 = 7*block[i+8*1] - 17*block[i+8*3] + vadd.s32 q13, q13, q9 @ z3 = 17*block[i+4*1] + 7*block[i+4*3] + vsub.s32 q12, q12, q1 @ z2 = 7*block[i+4*1] - 17*block[i+4*3] vadd.s32 q1, q10, q13 @ z0 + z3 vadd.s32 q2, q11, q12 @ z1 + z2 vsub.s32 q8, q10, q13 @ z0 - z3 @@ -70,25 +67,39 @@ vsub.s32 q15, q14, q9 @ z0 - z3 .endm -/* void ff_rv34_inv_transform_neon(DCTELEM *block); */ -function ff_rv34_inv_transform_neon, export=1 - mov r2, r0 - rv34_inv_transform - vrshrn.s32 d1, q2, #10 @ (z1 + z2) >> 10 - vrshrn.s32 d0, q1, #10 @ (z0 + z3) >> 10 - vrshrn.s32 d2, q3, #10 @ (z1 - z2) >> 10 - vrshrn.s32 d3, q15, #10 @ (z0 - z3) >> 10 - vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r2,:64], r1 - vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r2,:64], r1 - vst4.16 {d0[2], d1[2], d2[2], d3[2]}, [r2,:64], r1 - vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r2,:64], r1 +/* void rv34_idct_add_c(uint8_t *dst, int stride, DCTELEM *block) */ +function ff_rv34_idct_add_neon, export=1 + mov r3, r0 + rv34_inv_transform r2 + vmov.i16 q12, #0 + vrshrn.s32 d16, q1, #10 @ (z0 + z3) >> 10 + vrshrn.s32 d17, q2, #10 @ (z1 + z2) >> 10 + vrshrn.s32 d18, q3, #10 @ (z1 - z2) >> 10 + vrshrn.s32 d19, q15, #10 @ (z0 - z3) >> 10 + vld1.32 {d28[]}, [r0,:32], r1 + vld1.32 {d29[]}, [r0,:32], r1 + vtrn.32 q8, q9 + vld1.32 {d28[1]}, [r0,:32], r1 + vld1.32 {d29[1]}, [r0,:32], r1 + vst1.16 {q12}, [r2,:128]! @ memset(block, 0, 16) + vst1.16 {q12}, [r2,:128] @ memset(block+16, 0, 16) + vtrn.16 d16, d17 + vtrn.32 d28, d29 + vtrn.16 d18, d19 + vaddw.u8 q0, q8, d28 + vaddw.u8 q1, q9, d29 + vqmovun.s16 d28, q0 + vqmovun.s16 d29, q1 + vst1.32 {d28[0]}, [r3,:32], r1 + vst1.32 {d28[1]}, [r3,:32], r1 + vst1.32 {d29[0]}, [r3,:32], r1 + vst1.32 {d29[1]}, [r3,:32], r1 bx lr endfunc /* void rv34_inv_transform_noround_neon(DCTELEM *block); */ function ff_rv34_inv_transform_noround_neon, export=1 - mov r2, r0 - rv34_inv_transform + rv34_inv_transform r0 vshl.s32 q11, q2, #1 vshl.s32 q10, q1, #1 vshl.s32 q12, q3, #1 @@ -101,24 +112,33 @@ function ff_rv34_inv_transform_noround_neon, export=1 vshrn.s32 d1, q11, #11 @ (z1 + z2)*3 >> 11 vshrn.s32 d2, q12, #11 @ (z1 - z2)*3 >> 11 vshrn.s32 d3, q13, #11 @ (z0 - z3)*3 >> 11 - vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r2,:64], r1 - vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r2,:64], r1 - vst4.16 {d0[2], d1[2], d2[2], d3[2]}, [r2,:64], r1 - vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r2,:64], r1 + vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0,:64]! + vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r0,:64]! + vst4.16 {d0[2], d1[2], d2[2], d3[2]}, [r0,:64]! + vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0,:64]! bx lr endfunc -/* void rv34_inv_transform_dc_c(DCTELEM *block) */ -function ff_rv34_inv_transform_dc_neon, export=1 - vld1.16 {d28[]}, [r0,:16] @ block[0] - vmov.i16 d4, #169 - mov r1, #16 - vmull.s16 q3, d28, d4 - vrshrn.s32 d0, q3, #10 - vst1.16 {d0}, [r0,:64], r1 - vst1.16 {d0}, [r0,:64], r1 - vst1.16 {d0}, [r0,:64], r1 - vst1.16 {d0}, [r0,:64], r1 +/* void ff_rv34_idct_dc_add_neon(uint8_t *dst, int stride, int dc) */ +function ff_rv34_idct_dc_add_neon, export=1 + mov r3, r0 + vld1.32 {d28[]}, [r0,:32], r1 + vld1.32 {d29[]}, [r0,:32], r1 + vdup.16 d0, r2 + vmov.s16 d1, #169 + vld1.32 {d28[1]}, [r0,:32], r1 + vmull.s16 q1, d0, d1 @ dc * 13 * 13 + vld1.32 {d29[1]}, [r0,:32], r1 + vrshrn.s32 d0, q1, #10 @ (dc * 13 * 13 + 0x200) >> 10 + vmov d1, d0 + vaddw.u8 q2, q0, d28 + vaddw.u8 q3, q0, d29 + vqmovun.s16 d28, q2 + vqmovun.s16 d29, q3 + vst1.32 {d28[0]}, [r3,:32], r1 + vst1.32 {d29[0]}, [r3,:32], r1 + vst1.32 {d28[1]}, [r3,:32], r1 + vst1.32 {d29[1]}, [r3,:32], r1 bx lr endfunc @@ -127,12 +147,10 @@ function ff_rv34_inv_transform_noround_dc_neon, export=1 vld1.16 {d28[]}, [r0,:16] @ block[0] vmov.i16 d4, #251 vorr.s16 d4, #256 @ 13^2 * 3 - mov r1, #16 vmull.s16 q3, d28, d4 vshrn.s32 d0, q3, #11 - vst1.64 {d0}, [r0,:64], r1 - vst1.64 {d0}, [r0,:64], r1 - vst1.64 {d0}, [r0,:64], r1 - vst1.64 {d0}, [r0,:64], r1 + vmov.i16 d1, d0 + vst1.64 {q0}, [r0,:128]! + vst1.64 {q0}, [r0,:128]! bx lr endfunc diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 4e55e0e12b..e690c81e12 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -761,6 +761,11 @@ typedef struct RcOverride{ * Encoders: * The encoder needs to be fed with NULL data at the end of encoding until the * encoder no longer returns data. + * + * NOTE: For encoders implementing the AVCodec.encode2() function, setting this + * flag also means that the encoder must set the pts and duration for + * each output packet. If this flag is not set, the pts and duration will + * be determined by libavcodec from the input frame. */ #define CODEC_CAP_DELAY 0x0020 /** @@ -816,6 +821,10 @@ typedef struct RcOverride{ */ #define CODEC_CAP_AUTO_THREADS 0x8000 /** + * Audio encoder supports receiving a different number of samples in each call. + */ +#define CODEC_CAP_VARIABLE_FRAME_SIZE 0x10000 +/** * Codec is lossless. */ #define CODEC_CAP_LOSSLESS 0x80000000 @@ -3314,6 +3323,19 @@ typedef struct AVCodec { * Initialize codec static data, called from avcodec_register(). */ void (*init_static_data)(struct AVCodec *codec); + + /** + * Encode data to an AVPacket. + * + * @param avctx codec context + * @param avpkt output AVPacket (may contain a user-provided buffer) + * @param[in] frame AVFrame containing the raw data to be encoded + * @param[out] got_packet_ptr encoder sets to 0 or 1 to indicate that a + * non-empty packet was returned in avpkt. + * @return 0 on success, negative error code on failure + */ + int (*encode2)(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, + int *got_packet_ptr); } AVCodec; /** @@ -4331,9 +4353,12 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, */ void avsubtitle_free(AVSubtitle *sub); +#if FF_API_OLD_ENCODE_AUDIO /** * Encode an audio frame from samples into buf. * + * @deprecated Use avcodec_encode_audio2 instead. + * * @note The output buffer should be at least FF_MIN_BUFFER_SIZE bytes large. * However, for codecs with avctx->frame_size equal to 0 (e.g. PCM) the user * will know how much space is needed because it depends on the value passed @@ -4353,8 +4378,71 @@ void avsubtitle_free(AVSubtitle *sub); * @return On error a negative value is returned, on success zero or the number * of bytes used to encode the data read from the input buffer. */ -int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size, - const short *samples); +int attribute_deprecated avcodec_encode_audio(AVCodecContext *avctx, + uint8_t *buf, int buf_size, + const short *samples); +#endif + +/** + * Encode a frame of audio. + * + * Takes input samples from frame and writes the next output packet, if + * available, to avpkt. The output packet does not necessarily contain data for + * the most recent frame, as encoders can delay, split, and combine input frames + * internally as needed. + * + * @param avctx codec context + * @param avpkt output AVPacket. + * The user can supply an output buffer by setting + * avpkt->data and avpkt->size prior to calling the + * function, but if the size of the user-provided data is not + * large enough, encoding will fail. All other AVPacket fields + * will be reset by the encoder using av_init_packet(). If + * avpkt->data is NULL, the encoder will allocate it. + * The encoder will set avpkt->size to the size of the + * output packet. + * @param[in] frame AVFrame containing the raw audio data to be encoded. + * May be NULL when flushing an encoder that has the + * CODEC_CAP_DELAY capability set. + * There are 2 codec capabilities that affect the allowed + * values of frame->nb_samples. + * If CODEC_CAP_SMALL_LAST_FRAME is set, then only the final + * frame may be smaller than avctx->frame_size, and all other + * frames must be equal to avctx->frame_size. + * If CODEC_CAP_VARIABLE_FRAME_SIZE is set, then each frame + * can have any number of samples. + * If neither is set, frame->nb_samples must be equal to + * avctx->frame_size for all frames. + * @param[out] got_packet_ptr This field is set to 1 by libavcodec if the + * output packet is non-empty, and to 0 if it is + * empty. If the function returns an error, the + * packet can be assumed to be invalid, and the + * value of got_packet_ptr is undefined and should + * not be used. + * @return 0 on success, negative error code on failure + */ +int avcodec_encode_audio2(AVCodecContext *avctx, AVPacket *avpkt, + const AVFrame *frame, int *got_packet_ptr); + +/** + * Fill audio frame data and linesize. + * AVFrame extended_data channel pointers are allocated if necessary for + * planar audio. + * + * @param frame the AVFrame + * frame->nb_samples must be set prior to calling the + * function. This function fills in frame->data, + * frame->extended_data, frame->linesize[0]. + * @param nb_channels channel count + * @param sample_fmt sample format + * @param buf buffer to use for frame data + * @param buf_size size of buffer + * @param align plane size sample alignment + * @return 0 on success, negative error code on failure + */ +int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels, + enum AVSampleFormat sample_fmt, const uint8_t *buf, + int buf_size, int align); /** * Encode a video frame from pict into buf. diff --git a/libavcodec/internal.h b/libavcodec/internal.h index e6270f81bf..72a89441c2 100644 --- a/libavcodec/internal.h +++ b/libavcodec/internal.h @@ -61,6 +61,14 @@ typedef struct AVCodecInternal { * should be freed from the original context only. */ int is_copy; + +#if FF_API_OLD_DECODE_AUDIO + /** + * Internal sample count used by avcodec_encode_audio() to fabricate pts. + * Can be removed along with avcodec_encode_audio(). + */ + int sample_count; +#endif } AVCodecInternal; struct AVCodecDefault { @@ -111,4 +119,21 @@ int avpriv_unlock_avformat(void); */ #define FF_MAX_EXTRADATA_SIZE ((1 << 28) - FF_INPUT_BUFFER_PADDING_SIZE) +/** + * Check AVPacket size and/or allocate data. + * + * Encoders supporting AVCodec.encode2() can use this as a convenience to + * ensure the output packet data is large enough, whether provided by the user + * or allocated in this function. + * + * @param avpkt the AVPacket + * If avpkt->data is already set, avpkt->size is checked + * to ensure it is large enough. + * If avpkt->data is NULL, a new buffer is allocated. + * All other AVPacket fields will be reset with av_init_packet(). + * @param size the minimum required packet size + * @return 0 on success, negative error code on failure + */ +int ff_alloc_packet(AVPacket *avpkt, int size); + #endif /* AVCODEC_INTERNAL_H */ diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c index 3609c3b0d9..650003793c 100644 --- a/libavcodec/pcm.c +++ b/libavcodec/pcm.c @@ -27,6 +27,7 @@ #include "avcodec.h" #include "libavutil/common.h" /* for av_reverse */ #include "bytestream.h" +#include "internal.h" #include "pcm_tablegen.h" #define MAX_CHANNELS 64 @@ -77,10 +78,10 @@ static av_cold int pcm_encode_close(AVCodecContext *avctx) bytestream_put_##endian(&dst, v); \ } -static int pcm_encode_frame(AVCodecContext *avctx, - unsigned char *frame, int buf_size, void *data) +static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, + const AVFrame *frame, int *got_packet_ptr) { - int n, sample_size, v; + int n, sample_size, v, ret; const short *samples; unsigned char *dst; const uint8_t *srcu8; @@ -91,9 +92,14 @@ static int pcm_encode_frame(AVCodecContext *avctx, const uint32_t *samples_uint32_t; sample_size = av_get_bits_per_sample(avctx->codec->id)/8; - n = buf_size / sample_size; - samples = data; - dst = frame; + n = frame->nb_samples * avctx->channels; + samples = (const short *)frame->data[0]; + + if ((ret = ff_alloc_packet(avpkt, n * sample_size))) { + av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n"); + return ret; + } + dst = avpkt->data; switch(avctx->codec->id) { case CODEC_ID_PCM_U32LE: @@ -130,7 +136,7 @@ static int pcm_encode_frame(AVCodecContext *avctx, ENCODE(uint16_t, be16, samples, dst, n, 0, 0x8000) break; case CODEC_ID_PCM_S8: - srcu8= data; + srcu8 = frame->data[0]; for(;n>0;n--) { v = *srcu8++; *dst++ = v - 128; @@ -186,9 +192,10 @@ static int pcm_encode_frame(AVCodecContext *avctx, default: return -1; } - //avctx->frame_size = (dst - frame) / (sample_size * avctx->channels); - return dst - frame; + avpkt->size = frame->nb_samples * avctx->channels * sample_size; + *got_packet_ptr = 1; + return 0; } typedef struct PCMDecode { @@ -474,8 +481,9 @@ AVCodec ff_ ## name_ ## _encoder = { \ .type = AVMEDIA_TYPE_AUDIO, \ .id = id_, \ .init = pcm_encode_init, \ - .encode = pcm_encode_frame, \ + .encode2 = pcm_encode_frame, \ .close = pcm_encode_close, \ + .capabilities = CODEC_CAP_VARIABLE_FRAME_SIZE, \ .sample_fmts = (const enum AVSampleFormat[]){sample_fmt_,AV_SAMPLE_FMT_NONE}, \ .long_name = NULL_IF_CONFIG_SMALL(long_name_), \ } diff --git a/libavcodec/pictordec.c b/libavcodec/pictordec.c index b3b5f7ef4f..d788e6474c 100644 --- a/libavcodec/pictordec.c +++ b/libavcodec/pictordec.c @@ -33,6 +33,7 @@ typedef struct PicContext { AVFrame frame; int width, height; int nb_planes; + GetByteContext g; } PicContext; static void picmemset_8bpp(PicContext *s, int value, int run, int *x, int *y) @@ -55,7 +56,8 @@ static void picmemset_8bpp(PicContext *s, int value, int run, int *x, int *y) } } -static void picmemset(PicContext *s, int value, int run, int *x, int *y, int *plane, int bits_per_plane) +static void picmemset(PicContext *s, int value, int run, + int *x, int *y, int *plane, int bits_per_plane) { uint8_t *d; int shift = *plane * bits_per_plane; @@ -107,34 +109,35 @@ static int decode_frame(AVCodecContext *avctx, AVPacket *avpkt) { PicContext *s = avctx->priv_data; - int buf_size = avpkt->size; - const uint8_t *buf = avpkt->data; - const uint8_t *buf_end = avpkt->data + buf_size; uint32_t *palette; - int bits_per_plane, bpp, etype, esize, npal; - int i, x, y, plane; + int bits_per_plane, bpp, etype, esize, npal, pos_after_pal; + int i, x, y, plane, tmp; - if (buf_size < 11) + bytestream2_init(&s->g, avpkt->data, avpkt->size); + + if (bytestream2_get_bytes_left(&s->g) < 11) return AVERROR_INVALIDDATA; - if (bytestream_get_le16(&buf) != 0x1234) + if (bytestream2_get_le16u(&s->g) != 0x1234) return AVERROR_INVALIDDATA; - s->width = bytestream_get_le16(&buf); - s->height = bytestream_get_le16(&buf); - buf += 4; - bits_per_plane = *buf & 0xF; - s->nb_planes = (*buf++ >> 4) + 1; - bpp = s->nb_planes ? bits_per_plane*s->nb_planes : bits_per_plane; + + s->width = bytestream2_get_le16u(&s->g); + s->height = bytestream2_get_le16u(&s->g); + bytestream2_skip(&s->g, 4); + tmp = bytestream2_get_byteu(&s->g); + bits_per_plane = tmp & 0xF; + s->nb_planes = (tmp >> 4) + 1; + bpp = bits_per_plane * s->nb_planes; if (bits_per_plane > 8 || bpp < 1 || bpp > 32) { av_log_ask_for_sample(avctx, "unsupported bit depth\n"); return AVERROR_INVALIDDATA; } - if (*buf == 0xFF || bpp == 8) { - buf += 2; - etype = bytestream_get_le16(&buf); - esize = bytestream_get_le16(&buf); - if (buf_end - buf < esize) + if (bytestream2_peek_byte(&s->g) == 0xFF || bpp == 8) { + bytestream2_skip(&s->g, 2); + etype = bytestream2_get_le16(&s->g); + esize = bytestream2_get_le16(&s->g); + if (bytestream2_get_bytes_left(&s->g) < esize) return AVERROR_INVALIDDATA; } else { etype = -1; @@ -159,25 +162,30 @@ static int decode_frame(AVCodecContext *avctx, s->frame.pict_type = AV_PICTURE_TYPE_I; s->frame.palette_has_changed = 1; + pos_after_pal = bytestream2_tell(&s->g) + esize; palette = (uint32_t*)s->frame.data[1]; - if (etype == 1 && esize > 1 && *buf < 6) { - int idx = *buf; + if (etype == 1 && esize > 1 && bytestream2_peek_byte(&s->g) < 6) { + int idx = bytestream2_get_byte(&s->g); npal = 4; for (i = 0; i < npal; i++) palette[i] = ff_cga_palette[ cga_mode45_index[idx][i] ]; } else if (etype == 2) { npal = FFMIN(esize, 16); - for (i = 0; i < npal; i++) - palette[i] = ff_cga_palette[ FFMIN(buf[i], 16)]; + for (i = 0; i < npal; i++) { + int pal_idx = bytestream2_get_byte(&s->g); + palette[i] = ff_cga_palette[FFMIN(pal_idx, 16)]; + } } else if (etype == 3) { npal = FFMIN(esize, 16); - for (i = 0; i < npal; i++) - palette[i] = ff_ega_palette[ FFMIN(buf[i], 63)]; + for (i = 0; i < npal; i++) { + int pal_idx = bytestream2_get_byte(&s->g); + palette[i] = ff_ega_palette[FFMIN(pal_idx, 63)]; + } } else if (etype == 4 || etype == 5) { npal = FFMIN(esize / 3, 256); for (i = 0; i < npal; i++) { - palette[i] = AV_RB24(buf + i*3) << 2; - palette[i] |= 0xFF << 24 | palette[i] >> 6 & 0x30303; + palette[i] = bytestream2_get_be24(&s->g) << 2; + palette[i] |= 0xFFU << 24 | palette[i] >> 6 & 0x30303; } } else { if (bpp == 1) { @@ -195,29 +203,34 @@ static int decode_frame(AVCodecContext *avctx, } // fill remaining palette entries memset(palette + npal, 0, AVPALETTE_SIZE - npal * 4); - buf += esize; - + // skip remaining palette bytes + bytestream2_seek(&s->g, pos_after_pal, SEEK_SET); y = s->height - 1; - if (bytestream_get_le16(&buf)) { + if (bytestream2_get_le16(&s->g)) { x = 0; plane = 0; - while (y >= 0 && buf_end - buf >= 6) { - const uint8_t *buf_pend = buf + FFMIN(AV_RL16(buf), buf_end - buf); - //ignore uncompressed block size reported at buf[2] - int marker = buf[4]; - buf += 5; + while (y >= 0 && bytestream2_get_bytes_left(&s->g) >= 6) { + int stop_size, marker, t1, t2; + + t1 = bytestream2_get_bytes_left(&s->g); + t2 = bytestream2_get_le16(&s->g); + stop_size = t1 - FFMIN(t1, t2); + // ignore uncompressed block size + bytestream2_skip(&s->g, 2); + marker = bytestream2_get_byte(&s->g); - while (plane < s->nb_planes && y >= 0 && buf_pend - buf >= 1) { + while (plane < s->nb_planes && y >= 0 && + bytestream2_get_bytes_left(&s->g) > stop_size) { int run = 1; - int val = *buf++; + int val = bytestream2_get_byte(&s->g); if (val == marker) { - run = *buf++; + run = bytestream2_get_byte(&s->g); if (run == 0) - run = bytestream_get_le16(&buf); - val = *buf++; + run = bytestream2_get_le16(&s->g); + val = bytestream2_get_byte(&s->g); } - if (buf > buf_end) + if (!bytestream2_get_bytes_left(&s->g)) break; if (bits_per_plane == 8) { @@ -228,16 +241,16 @@ static int decode_frame(AVCodecContext *avctx, } } } else { - while (y >= 0 && buf < buf_end) { - memcpy(s->frame.data[0] + y * s->frame.linesize[0], buf, FFMIN(avctx->width, buf_end - buf)); - buf += avctx->width; + while (y >= 0 && bytestream2_get_bytes_left(&s->g) > 0) { + memcpy(s->frame.data[0] + y * s->frame.linesize[0], s->g.buffer, FFMIN(avctx->width, bytestream2_get_bytes_left(&s->g))); + bytestream2_skip(&s->g, avctx->width); y--; } } *data_size = sizeof(AVFrame); *(AVFrame*)data = s->frame; - return buf_size; + return avpkt->size; } static av_cold int decode_end(AVCodecContext *avctx) diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c index 2be9b3cd38..e09d5dcf14 100644 --- a/libavcodec/rv34.c +++ b/libavcodec/rv34.c @@ -240,15 +240,15 @@ static inline void decode_subblock(DCTELEM *dst, int code, const int is_block2, { int flags = modulo_three_table[code]; - decode_coeff( dst+0, (flags >> 6) , 3, gb, vlc, q); + decode_coeff( dst+0*4+0, (flags >> 6) , 3, gb, vlc, q); if(is_block2){ - decode_coeff(dst+8, (flags >> 4) & 3, 2, gb, vlc, q); - decode_coeff(dst+1, (flags >> 2) & 3, 2, gb, vlc, q); + decode_coeff(dst+1*4+0, (flags >> 4) & 3, 2, gb, vlc, q); + decode_coeff(dst+0*4+1, (flags >> 2) & 3, 2, gb, vlc, q); }else{ - decode_coeff(dst+1, (flags >> 4) & 3, 2, gb, vlc, q); - decode_coeff(dst+8, (flags >> 2) & 3, 2, gb, vlc, q); + decode_coeff(dst+0*4+1, (flags >> 4) & 3, 2, gb, vlc, q); + decode_coeff(dst+1*4+0, (flags >> 2) & 3, 2, gb, vlc, q); } - decode_coeff( dst+9, (flags >> 0) & 3, 2, gb, vlc, q); + decode_coeff( dst+1*4+1, (flags >> 0) & 3, 2, gb, vlc, q); } /** @@ -265,15 +265,15 @@ static inline void decode_subblock3(DCTELEM *dst, int code, const int is_block2, { int flags = modulo_three_table[code]; - decode_coeff( dst+0, (flags >> 6) , 3, gb, vlc, q_dc); + decode_coeff( dst+0*4+0, (flags >> 6) , 3, gb, vlc, q_dc); if(is_block2){ - decode_coeff(dst+8, (flags >> 4) & 3, 2, gb, vlc, q_ac1); - decode_coeff(dst+1, (flags >> 2) & 3, 2, gb, vlc, q_ac1); + decode_coeff(dst+1*4+0, (flags >> 4) & 3, 2, gb, vlc, q_ac1); + decode_coeff(dst+0*4+1, (flags >> 2) & 3, 2, gb, vlc, q_ac1); }else{ - decode_coeff(dst+1, (flags >> 4) & 3, 2, gb, vlc, q_ac1); - decode_coeff(dst+8, (flags >> 2) & 3, 2, gb, vlc, q_ac1); + decode_coeff(dst+0*4+1, (flags >> 4) & 3, 2, gb, vlc, q_ac1); + decode_coeff(dst+1*4+0, (flags >> 2) & 3, 2, gb, vlc, q_ac1); } - decode_coeff( dst+9, (flags >> 0) & 3, 2, gb, vlc, q_ac2); + decode_coeff( dst+1*4+1, (flags >> 0) & 3, 2, gb, vlc, q_ac2); } /** @@ -308,15 +308,15 @@ static inline int rv34_decode_block(DCTELEM *dst, GetBitContext *gb, RV34VLC *rv if(pattern & 4){ code = get_vlc2(gb, rvlc->second_pattern[sc].table, 9, 2); - decode_subblock(dst + 2, code, 0, gb, &rvlc->coefficient, q_ac2); + decode_subblock(dst + 4*0+2, code, 0, gb, &rvlc->coefficient, q_ac2); } if(pattern & 2){ // Looks like coefficients 1 and 2 are swapped for this block code = get_vlc2(gb, rvlc->second_pattern[sc].table, 9, 2); - decode_subblock(dst + 8*2, code, 1, gb, &rvlc->coefficient, q_ac2); + decode_subblock(dst + 4*2+0, code, 1, gb, &rvlc->coefficient, q_ac2); } if(pattern & 1){ code = get_vlc2(gb, rvlc->third_pattern[sc].table, 9, 2); - decode_subblock(dst + 8*2+2, code, 0, gb, &rvlc->coefficient, q_ac2); + decode_subblock(dst + 4*2+2, code, 0, gb, &rvlc->coefficient, q_ac2); } return has_ac || pattern; } @@ -351,44 +351,70 @@ static inline RV34VLC* choose_vlc_set(int quant, int mod, int type) } /** - * Decode macroblock header and return CBP in case of success, -1 otherwise. + * Decode intra macroblock header and return CBP in case of success, -1 otherwise. */ -static int rv34_decode_mb_header(RV34DecContext *r, int8_t *intra_types) +static int rv34_decode_intra_mb_header(RV34DecContext *r, int8_t *intra_types) { MpegEncContext *s = &r->s; GetBitContext *gb = &s->gb; int mb_pos = s->mb_x + s->mb_y * s->mb_stride; - int i, t; + int t; - if(!r->si.type){ - r->is16 = get_bits1(gb); - if(!r->is16 && !r->rv30){ + r->is16 = get_bits1(gb); + if(r->is16){ + s->current_picture_ptr->f.mb_type[mb_pos] = MB_TYPE_INTRA16x16; + r->block_type = RV34_MB_TYPE_INTRA16x16; + t = get_bits(gb, 2); + fill_rectangle(intra_types, 4, 4, r->intra_types_stride, t, sizeof(intra_types[0])); + r->luma_vlc = 2; + }else{ + if(!r->rv30){ if(!get_bits1(gb)) av_log(s->avctx, AV_LOG_ERROR, "Need DQUANT\n"); } - s->current_picture_ptr->f.mb_type[mb_pos] = r->is16 ? MB_TYPE_INTRA16x16 : MB_TYPE_INTRA; - r->block_type = r->is16 ? RV34_MB_TYPE_INTRA16x16 : RV34_MB_TYPE_INTRA; - }else{ - r->block_type = r->decode_mb_info(r); - if(r->block_type == -1) + s->current_picture_ptr->f.mb_type[mb_pos] = MB_TYPE_INTRA; + r->block_type = RV34_MB_TYPE_INTRA; + if(r->decode_intra_types(r, gb, intra_types) < 0) return -1; - s->current_picture_ptr->f.mb_type[mb_pos] = rv34_mb_type_to_lavc[r->block_type]; - r->mb_type[mb_pos] = r->block_type; - if(r->block_type == RV34_MB_SKIP){ - if(s->pict_type == AV_PICTURE_TYPE_P) - r->mb_type[mb_pos] = RV34_MB_P_16x16; - if(s->pict_type == AV_PICTURE_TYPE_B) - r->mb_type[mb_pos] = RV34_MB_B_DIRECT; - } - r->is16 = !!IS_INTRA16x16(s->current_picture_ptr->f.mb_type[mb_pos]); - rv34_decode_mv(r, r->block_type); - if(r->block_type == RV34_MB_SKIP){ - fill_rectangle(intra_types, 4, 4, r->intra_types_stride, 0, sizeof(intra_types[0])); - return 0; - } - r->chroma_vlc = 1; - r->luma_vlc = 0; + r->luma_vlc = 1; + } + + r->chroma_vlc = 0; + r->cur_vlcs = choose_vlc_set(r->si.quant, r->si.vlc_set, 0); + + return rv34_decode_cbp(gb, r->cur_vlcs, r->is16); +} + +/** + * Decode inter macroblock header and return CBP in case of success, -1 otherwise. + */ +static int rv34_decode_inter_mb_header(RV34DecContext *r, int8_t *intra_types) +{ + MpegEncContext *s = &r->s; + GetBitContext *gb = &s->gb; + int mb_pos = s->mb_x + s->mb_y * s->mb_stride; + int i, t; + + r->block_type = r->decode_mb_info(r); + if(r->block_type == -1) + return -1; + s->current_picture_ptr->f.mb_type[mb_pos] = rv34_mb_type_to_lavc[r->block_type]; + r->mb_type[mb_pos] = r->block_type; + if(r->block_type == RV34_MB_SKIP){ + if(s->pict_type == AV_PICTURE_TYPE_P) + r->mb_type[mb_pos] = RV34_MB_P_16x16; + if(s->pict_type == AV_PICTURE_TYPE_B) + r->mb_type[mb_pos] = RV34_MB_B_DIRECT; } + r->is16 = !!IS_INTRA16x16(s->current_picture_ptr->f.mb_type[mb_pos]); + rv34_decode_mv(r, r->block_type); + if(r->block_type == RV34_MB_SKIP){ + fill_rectangle(intra_types, 4, 4, r->intra_types_stride, 0, sizeof(intra_types[0])); + return 0; + } + r->chroma_vlc = 1; + r->luma_vlc = 0; + if(IS_INTRA(s->current_picture_ptr->f.mb_type[mb_pos])){ if(r->is16){ t = get_bits(gb, 2); @@ -956,15 +982,6 @@ static void rv34_pred_4x4_block(RV34DecContext *r, uint8_t *dst, int stride, int r->h.pred4x4[itype](dst, prev, stride); } -/** add_pixels_clamped for 4x4 block */ -static void rv34_add_4x4_block(uint8_t *dst, int stride, DCTELEM block[64], int off) -{ - int x, y; - for(y = 0; y < 4; y++) - for(x = 0; x < 4; x++) - dst[x + y*stride] = av_clip_uint8(dst[x + y*stride] + block[off + x+y*8]); -} - static inline int adjust_pred16(int itype, int up, int left) { if(!up && !left) @@ -981,15 +998,35 @@ static inline int adjust_pred16(int itype, int up, int left) return itype; } -static void rv34_output_macroblock(RV34DecContext *r, int8_t *intra_types, int cbp, int is16) +static inline void rv34_process_block(RV34DecContext *r, + uint8_t *pdst, int stride, + int fc, int sc, int q_dc, int q_ac) { MpegEncContext *s = &r->s; - DSPContext *dsp = &s->dsp; - int i, j; - uint8_t *Y, *U, *V; - int itype; - int avail[6*8] = {0}; - int idx; + DCTELEM *ptr = s->block[0]; + int has_ac = rv34_decode_block(ptr, &s->gb, r->cur_vlcs, + fc, sc, q_dc, q_ac, q_ac); + if(has_ac){ + r->rdsp.rv34_idct_add(pdst, stride, ptr); + }else{ + r->rdsp.rv34_idct_dc_add(pdst, stride, ptr[0]); + ptr[0] = 0; + } +} + +static void rv34_output_i16x16(RV34DecContext *r, int8_t *intra_types, int cbp) +{ + LOCAL_ALIGNED_16(DCTELEM, block16, [16]); + MpegEncContext *s = &r->s; + GetBitContext *gb = &s->gb; + int q_dc = rv34_qscale_tab[ r->luma_dc_quant_i[s->qscale] ], + q_ac = rv34_qscale_tab[s->qscale]; + uint8_t *dst = s->dest[0]; + DCTELEM *ptr = s->block[0]; + int avail[6*8] = {0}; + int i, j, itype, has_ac; + + memset(block16, 0, 16 * sizeof(*block16)); // Set neighbour information. if(r->avail_cache[1]) @@ -1005,80 +1042,118 @@ static void rv34_output_macroblock(RV34DecContext *r, int8_t *intra_types, int c if(r->avail_cache[9]) avail[24] = avail[32] = 1; - Y = s->dest[0]; - U = s->dest[1]; - V = s->dest[2]; - if(!is16){ - for(j = 0; j < 4; j++){ - idx = 9 + j*8; - for(i = 0; i < 4; i++, cbp >>= 1, Y += 4, idx++){ - rv34_pred_4x4_block(r, Y, s->linesize, ittrans[intra_types[i]], avail[idx-8], avail[idx-1], avail[idx+7], avail[idx-7]); - avail[idx] = 1; - if(cbp & 1) - rv34_add_4x4_block(Y, s->linesize, s->block[(i>>1)+(j&2)], (i&1)*4+(j&1)*32); - } - Y += s->linesize * 4 - 4*4; - intra_types += r->intra_types_stride; + has_ac = rv34_decode_block(block16, gb, r->cur_vlcs, 3, 0, q_dc, q_dc, q_ac); + if(has_ac) + r->rdsp.rv34_inv_transform(block16); + else + r->rdsp.rv34_inv_transform_dc(block16); + + itype = ittrans16[intra_types[0]]; + itype = adjust_pred16(itype, r->avail_cache[6-4], r->avail_cache[6-1]); + r->h.pred16x16[itype](dst, s->linesize); + + for(j = 0; j < 4; j++){ + for(i = 0; i < 4; i++, cbp >>= 1){ + int dc = block16[i + j*4]; + + if(cbp & 1){ + has_ac = rv34_decode_block(ptr, gb, r->cur_vlcs, r->luma_vlc, 0, q_ac, q_ac, q_ac); + }else + has_ac = 0; + + if(has_ac){ + ptr[0] = dc; + r->rdsp.rv34_idct_add(dst+4*i, s->linesize, ptr); + }else + r->rdsp.rv34_idct_dc_add(dst+4*i, s->linesize, dc); } - intra_types -= r->intra_types_stride * 4; - fill_rectangle(r->avail_cache + 6, 2, 2, 4, 0, 4); - for(j = 0; j < 2; j++){ - idx = 6 + j*4; - for(i = 0; i < 2; i++, cbp >>= 1, idx++){ - rv34_pred_4x4_block(r, U + i*4 + j*4*s->uvlinesize, s->uvlinesize, ittrans[intra_types[i*2+j*2*r->intra_types_stride]], r->avail_cache[idx-4], r->avail_cache[idx-1], !i && !j, r->avail_cache[idx-3]); - rv34_pred_4x4_block(r, V + i*4 + j*4*s->uvlinesize, s->uvlinesize, ittrans[intra_types[i*2+j*2*r->intra_types_stride]], r->avail_cache[idx-4], r->avail_cache[idx-1], !i && !j, r->avail_cache[idx-3]); - r->avail_cache[idx] = 1; - if(cbp & 0x01) - rv34_add_4x4_block(U + i*4 + j*4*s->uvlinesize, s->uvlinesize, s->block[4], i*4+j*32); - if(cbp & 0x10) - rv34_add_4x4_block(V + i*4 + j*4*s->uvlinesize, s->uvlinesize, s->block[5], i*4+j*32); - } + + dst += 4*s->linesize; + } + + itype = ittrans16[intra_types[0]]; + if(itype == PLANE_PRED8x8) itype = DC_PRED8x8; + itype = adjust_pred16(itype, r->avail_cache[6-4], r->avail_cache[6-1]); + + q_dc = rv34_qscale_tab[rv34_chroma_quant[1][s->qscale]]; + q_ac = rv34_qscale_tab[rv34_chroma_quant[0][s->qscale]]; + + for(j = 1; j < 3; j++){ + dst = s->dest[j]; + r->h.pred8x8[itype](dst, s->uvlinesize); + for(i = 0; i < 4; i++, cbp >>= 1){ + uint8_t *pdst; + if(!(cbp & 1)) continue; + pdst = dst + (i&1)*4 + (i&2)*2*s->uvlinesize; + + rv34_process_block(r, pdst, s->uvlinesize, + r->chroma_vlc, 1, q_dc, q_ac); } - }else{ - itype = ittrans16[intra_types[0]]; - itype = adjust_pred16(itype, r->avail_cache[6-4], r->avail_cache[6-1]); - r->h.pred16x16[itype](Y, s->linesize); - dsp->add_pixels_clamped(s->block[0], Y, s->linesize); - dsp->add_pixels_clamped(s->block[1], Y + 8, s->linesize); - Y += s->linesize * 8; - dsp->add_pixels_clamped(s->block[2], Y, s->linesize); - dsp->add_pixels_clamped(s->block[3], Y + 8, s->linesize); - - itype = ittrans16[intra_types[0]]; - if(itype == PLANE_PRED8x8) itype = DC_PRED8x8; - itype = adjust_pred16(itype, r->avail_cache[6-4], r->avail_cache[6-1]); - r->h.pred8x8[itype](U, s->uvlinesize); - dsp->add_pixels_clamped(s->block[4], U, s->uvlinesize); - r->h.pred8x8[itype](V, s->uvlinesize); - dsp->add_pixels_clamped(s->block[5], V, s->uvlinesize); } } -/** - * mask for retrieving all bits in coded block pattern - * corresponding to one 8x8 block - */ -#define LUMA_CBP_BLOCK_MASK 0x33 +static void rv34_output_intra(RV34DecContext *r, int8_t *intra_types, int cbp) +{ + MpegEncContext *s = &r->s; + uint8_t *dst = s->dest[0]; + int avail[6*8] = {0}; + int i, j, k; + int idx, q_ac, q_dc; -#define U_CBP_MASK 0x0F0000 -#define V_CBP_MASK 0xF00000 + // Set neighbour information. + if(r->avail_cache[1]) + avail[0] = 1; + if(r->avail_cache[2]) + avail[1] = avail[2] = 1; + if(r->avail_cache[3]) + avail[3] = avail[4] = 1; + if(r->avail_cache[4]) + avail[5] = 1; + if(r->avail_cache[5]) + avail[8] = avail[16] = 1; + if(r->avail_cache[9]) + avail[24] = avail[32] = 1; -/** @} */ // recons group + q_ac = rv34_qscale_tab[s->qscale]; + for(j = 0; j < 4; j++){ + idx = 9 + j*8; + for(i = 0; i < 4; i++, cbp >>= 1, dst += 4, idx++){ + rv34_pred_4x4_block(r, dst, s->linesize, ittrans[intra_types[i]], avail[idx-8], avail[idx-1], avail[idx+7], avail[idx-7]); + avail[idx] = 1; + if(!(cbp & 1)) continue; + + rv34_process_block(r, dst, s->linesize, + r->luma_vlc, 0, q_ac, q_ac); + } + dst += s->linesize * 4 - 4*4; + intra_types += r->intra_types_stride; + } + intra_types -= r->intra_types_stride * 4; -static void rv34_apply_differences(RV34DecContext *r, int cbp) -{ - static const int shifts[4] = { 0, 2, 8, 10 }; - MpegEncContext *s = &r->s; - int i; + q_dc = rv34_qscale_tab[rv34_chroma_quant[1][s->qscale]]; + q_ac = rv34_qscale_tab[rv34_chroma_quant[0][s->qscale]]; + + for(k = 0; k < 2; k++){ + dst = s->dest[1+k]; + fill_rectangle(r->avail_cache + 6, 2, 2, 4, 0, 4); + + for(j = 0; j < 2; j++){ + int* acache = r->avail_cache + 6 + j*4; + for(i = 0; i < 2; i++, cbp >>= 1, acache++){ + int itype = ittrans[intra_types[i*2+j*2*r->intra_types_stride]]; + rv34_pred_4x4_block(r, dst+4*i, s->uvlinesize, itype, acache[-4], acache[-1], !i && !j, acache[-3]); + acache[0] = 1; - for(i = 0; i < 4; i++) - if((cbp & (LUMA_CBP_BLOCK_MASK << shifts[i])) || r->block_type == RV34_MB_P_MIX16x16) - s->dsp.add_pixels_clamped(s->block[i], s->dest[0] + (i & 1)*8 + (i&2)*4*s->linesize, s->linesize); - if(cbp & U_CBP_MASK) - s->dsp.add_pixels_clamped(s->block[4], s->dest[1], s->uvlinesize); - if(cbp & V_CBP_MASK) - s->dsp.add_pixels_clamped(s->block[5], s->dest[2], s->uvlinesize); + if(!(cbp&1)) continue; + + rv34_process_block(r, dst + 4*i, s->uvlinesize, + r->chroma_vlc, 1, q_dc, q_ac); + } + + dst += 4*s->uvlinesize; + } + } } static int is_mv_diff_gt_3(int16_t (*motion_val)[2], int step) @@ -1123,17 +1198,17 @@ static int rv34_set_deblock_coef(RV34DecContext *r) return hmvmask | vmvmask; } -static int rv34_decode_macroblock(RV34DecContext *r, int8_t *intra_types) +static int rv34_decode_inter_macroblock(RV34DecContext *r, int8_t *intra_types) { - MpegEncContext *s = &r->s; - GetBitContext *gb = &s->gb; + MpegEncContext *s = &r->s; + GetBitContext *gb = &s->gb; + uint8_t *dst = s->dest[0]; + DCTELEM *ptr = s->block[0]; + int mb_pos = s->mb_x + s->mb_y * s->mb_stride; int cbp, cbp2; int q_dc, q_ac, has_ac; - int i, blknum, blkoff; - LOCAL_ALIGNED_16(DCTELEM, block16, [64]); - int luma_dc_quant; + int i, j; int dist; - int mb_pos = s->mb_x + s->mb_y * s->mb_stride; // Calculate which neighbours are available. Maybe it's worth optimizing too. memset(r->avail_cache, 0, sizeof(r->avail_cache)); @@ -1151,70 +1226,126 @@ static int rv34_decode_macroblock(RV34DecContext *r, int8_t *intra_types) r->avail_cache[1] = s->current_picture_ptr->f.mb_type[mb_pos - s->mb_stride - 1]; s->qscale = r->si.quant; - cbp = cbp2 = rv34_decode_mb_header(r, intra_types); + cbp = cbp2 = rv34_decode_inter_mb_header(r, intra_types); r->cbp_luma [mb_pos] = cbp; r->cbp_chroma[mb_pos] = cbp >> 16; - if(s->pict_type == AV_PICTURE_TYPE_I) - r->deblock_coefs[mb_pos] = 0xFFFF; - else - r->deblock_coefs[mb_pos] = rv34_set_deblock_coef(r) | r->cbp_luma[mb_pos]; + r->deblock_coefs[mb_pos] = rv34_set_deblock_coef(r) | r->cbp_luma[mb_pos]; s->current_picture_ptr->f.qscale_table[mb_pos] = s->qscale; if(cbp == -1) return -1; - luma_dc_quant = r->block_type == RV34_MB_P_MIX16x16 ? r->luma_dc_quant_p[s->qscale] : r->luma_dc_quant_i[s->qscale]; + if (IS_INTRA(s->current_picture_ptr->f.mb_type[mb_pos])){ + if(r->is16) rv34_output_i16x16(r, intra_types, cbp); + else rv34_output_intra(r, intra_types, cbp); + return 0; + } + if(r->is16){ - q_dc = rv34_qscale_tab[luma_dc_quant]; + // Only for RV34_MB_P_MIX16x16 + LOCAL_ALIGNED_16(DCTELEM, block16, [16]); + memset(block16, 0, 16 * sizeof(*block16)); + q_dc = rv34_qscale_tab[ r->luma_dc_quant_p[s->qscale] ]; q_ac = rv34_qscale_tab[s->qscale]; - s->dsp.clear_block(block16); if (rv34_decode_block(block16, gb, r->cur_vlcs, 3, 0, q_dc, q_dc, q_ac)) - r->rdsp.rv34_inv_transform_tab[1](block16); + r->rdsp.rv34_inv_transform(block16); else - r->rdsp.rv34_inv_transform_dc_tab[1](block16); - } + r->rdsp.rv34_inv_transform_dc(block16); + + q_ac = rv34_qscale_tab[s->qscale]; + + for(j = 0; j < 4; j++){ + for(i = 0; i < 4; i++, cbp >>= 1){ + int dc = block16[i + j*4]; + + if(cbp & 1){ + has_ac = rv34_decode_block(ptr, gb, r->cur_vlcs, r->luma_vlc, 0, q_ac, q_ac, q_ac); + }else + has_ac = 0; + + if(has_ac){ + ptr[0] = dc; + r->rdsp.rv34_idct_add(dst+4*i, s->linesize, ptr); + }else + r->rdsp.rv34_idct_dc_add(dst+4*i, s->linesize, dc); + } + + dst += 4*s->linesize; + } - q_ac = rv34_qscale_tab[s->qscale]; - for(i = 0; i < 16; i++, cbp >>= 1){ - DCTELEM *ptr; - if(!r->is16 && !(cbp & 1)) continue; - blknum = ((i & 2) >> 1) + ((i & 8) >> 2); - blkoff = ((i & 1) << 2) + ((i & 4) << 3); - ptr = s->block[blknum] + blkoff; - if(cbp & 1) - has_ac = rv34_decode_block(ptr, gb, r->cur_vlcs, r->luma_vlc, 0, q_ac, q_ac, q_ac); - else - has_ac = 0; - if(r->is16) //FIXME: optimize - ptr[0] = block16[(i & 3) | ((i & 0xC) << 1)]; - if(has_ac) - r->rdsp.rv34_inv_transform_tab[0](ptr); - else - r->rdsp.rv34_inv_transform_dc_tab[0](ptr); - } - if(r->block_type == RV34_MB_P_MIX16x16) r->cur_vlcs = choose_vlc_set(r->si.quant, r->si.vlc_set, 1); + }else{ + q_ac = rv34_qscale_tab[s->qscale]; + + for(j = 0; j < 4; j++){ + for(i = 0; i < 4; i++, cbp >>= 1){ + if(!(cbp & 1)) continue; + + rv34_process_block(r, dst + 4*i, s->linesize, + r->luma_vlc, 0, q_ac, q_ac); + } + dst += 4*s->linesize; + } + } + q_dc = rv34_qscale_tab[rv34_chroma_quant[1][s->qscale]]; q_ac = rv34_qscale_tab[rv34_chroma_quant[0][s->qscale]]; - for(; i < 24; i++, cbp >>= 1){ - DCTELEM *ptr; - if(!(cbp & 1)) continue; - blknum = ((i & 4) >> 2) + 4; - blkoff = ((i & 1) << 2) + ((i & 2) << 4); - ptr = s->block[blknum] + blkoff; - if (rv34_decode_block(ptr, gb, r->cur_vlcs, r->chroma_vlc, 1, q_dc, q_ac, q_ac)) - r->rdsp.rv34_inv_transform_tab[0](ptr); - else - r->rdsp.rv34_inv_transform_dc_tab[0](ptr); + + for(j = 1; j < 3; j++){ + dst = s->dest[j]; + for(i = 0; i < 4; i++, cbp >>= 1){ + uint8_t *pdst; + if(!(cbp & 1)) continue; + pdst = dst + (i&1)*4 + (i&2)*2*s->uvlinesize; + + rv34_process_block(r, pdst, s->uvlinesize, + r->chroma_vlc, 1, q_dc, q_ac); + } } - if (IS_INTRA(s->current_picture_ptr->f.mb_type[mb_pos])) - rv34_output_macroblock(r, intra_types, cbp2, r->is16); - else - rv34_apply_differences(r, cbp2); return 0; } +static int rv34_decode_intra_macroblock(RV34DecContext *r, int8_t *intra_types) +{ + MpegEncContext *s = &r->s; + int cbp, dist; + int mb_pos = s->mb_x + s->mb_y * s->mb_stride; + + // Calculate which neighbours are available. Maybe it's worth optimizing too. + memset(r->avail_cache, 0, sizeof(r->avail_cache)); + fill_rectangle(r->avail_cache + 6, 2, 2, 4, 1, 4); + dist = (s->mb_x - s->resync_mb_x) + (s->mb_y - s->resync_mb_y) * s->mb_width; + if(s->mb_x && dist) + r->avail_cache[5] = + r->avail_cache[9] = s->current_picture_ptr->f.mb_type[mb_pos - 1]; + if(dist >= s->mb_width) + r->avail_cache[2] = + r->avail_cache[3] = s->current_picture_ptr->f.mb_type[mb_pos - s->mb_stride]; + if(((s->mb_x+1) < s->mb_width) && dist >= s->mb_width - 1) + r->avail_cache[4] = s->current_picture_ptr->f.mb_type[mb_pos - s->mb_stride + 1]; + if(s->mb_x && dist > s->mb_width) + r->avail_cache[1] = s->current_picture_ptr->f.mb_type[mb_pos - s->mb_stride - 1]; + + s->qscale = r->si.quant; + cbp = rv34_decode_intra_mb_header(r, intra_types); + r->cbp_luma [mb_pos] = cbp; + r->cbp_chroma[mb_pos] = cbp >> 16; + r->deblock_coefs[mb_pos] = 0xFFFF; + s->current_picture_ptr->f.qscale_table[mb_pos] = s->qscale; + + if(cbp == -1) + return -1; + + if(r->is16){ + rv34_output_i16x16(r, intra_types, cbp); + return 0; + } + + rv34_output_intra(r, intra_types, cbp); + return 0; +} + static int check_slice_end(RV34DecContext *r, MpegEncContext *s) { int bits; @@ -1326,9 +1457,12 @@ static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int ff_init_block_index(s); while(!check_slice_end(r, s)) { ff_update_block_index(s); - s->dsp.clear_blocks(s->block[0]); - if(rv34_decode_macroblock(r, r->intra_types + s->mb_x * 4 + 4) < 0){ + if(r->si.type) + res = rv34_decode_inter_macroblock(r, r->intra_types + s->mb_x * 4 + 4); + else + res = rv34_decode_intra_macroblock(r, r->intra_types + s->mb_x * 4 + 4); + if(res < 0){ ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_ERROR); return -1; } diff --git a/libavcodec/rv34dsp.c b/libavcodec/rv34dsp.c index 1767be4173..e2251773af 100644 --- a/libavcodec/rv34dsp.c +++ b/libavcodec/rv34dsp.c @@ -37,10 +37,10 @@ static av_always_inline void rv34_row_transform(int temp[16], DCTELEM *block) int i; for(i = 0; i < 4; i++){ - const int z0 = 13*(block[i+8*0] + block[i+8*2]); - const int z1 = 13*(block[i+8*0] - block[i+8*2]); - const int z2 = 7* block[i+8*1] - 17*block[i+8*3]; - const int z3 = 17* block[i+8*1] + 7*block[i+8*3]; + const int z0 = 13*(block[i+4*0] + block[i+4*2]); + const int z1 = 13*(block[i+4*0] - block[i+4*2]); + const int z2 = 7* block[i+4*1] - 17*block[i+4*3]; + const int z3 = 17* block[i+4*1] + 7*block[i+4*3]; temp[4*i+0] = z0 + z3; temp[4*i+1] = z1 + z2; @@ -50,14 +50,16 @@ static av_always_inline void rv34_row_transform(int temp[16], DCTELEM *block) } /** - * Real Video 3.0/4.0 inverse transform + * Real Video 3.0/4.0 inverse transform + sample reconstruction * Code is almost the same as in SVQ3, only scaling is different. */ -static void rv34_inv_transform_c(DCTELEM *block){ - int temp[16]; - int i; +static void rv34_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){ + int temp[16]; + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; + int i; rv34_row_transform(temp, block); + memset(block, 0, 16*sizeof(DCTELEM)); for(i = 0; i < 4; i++){ const int z0 = 13*(temp[4*0+i] + temp[4*2+i]) + 0x200; @@ -65,10 +67,12 @@ static void rv34_inv_transform_c(DCTELEM *block){ const int z2 = 7* temp[4*1+i] - 17*temp[4*3+i]; const int z3 = 17* temp[4*1+i] + 7*temp[4*3+i]; - block[i*8+0] = (z0 + z3) >> 10; - block[i*8+1] = (z1 + z2) >> 10; - block[i*8+2] = (z1 - z2) >> 10; - block[i*8+3] = (z0 - z3) >> 10; + dst[0] = cm[ dst[0] + ( (z0 + z3) >> 10 ) ]; + dst[1] = cm[ dst[1] + ( (z1 + z2) >> 10 ) ]; + dst[2] = cm[ dst[2] + ( (z1 - z2) >> 10 ) ]; + dst[3] = cm[ dst[3] + ( (z0 - z3) >> 10 ) ]; + + dst += stride; } } @@ -90,21 +94,27 @@ static void rv34_inv_transform_noround_c(DCTELEM *block){ const int z2 = 7* temp[4*1+i] - 17*temp[4*3+i]; const int z3 = 17* temp[4*1+i] + 7*temp[4*3+i]; - block[i*8+0] = ((z0 + z3) * 3) >> 11; - block[i*8+1] = ((z1 + z2) * 3) >> 11; - block[i*8+2] = ((z1 - z2) * 3) >> 11; - block[i*8+3] = ((z0 - z3) * 3) >> 11; + block[i*4+0] = ((z0 + z3) * 3) >> 11; + block[i*4+1] = ((z1 + z2) * 3) >> 11; + block[i*4+2] = ((z1 - z2) * 3) >> 11; + block[i*4+3] = ((z0 - z3) * 3) >> 11; } } -static void rv34_inv_transform_dc_c(DCTELEM *block) +static void rv34_idct_dc_add_c(uint8_t *dst, int stride, int dc) { - DCTELEM dc = (13 * 13 * block[0] + 0x200) >> 10; + const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; int i, j; - for (i = 0; i < 4; i++, block += 8) + cm += (13*13*dc + 0x200) >> 10; + + for (i = 0; i < 4; i++) + { for (j = 0; j < 4; j++) - block[j] = dc; + dst[j] = cm[ dst[j] ]; + + dst += stride; + } } static void rv34_inv_transform_dc_noround_c(DCTELEM *block) @@ -112,7 +122,7 @@ static void rv34_inv_transform_dc_noround_c(DCTELEM *block) DCTELEM dc = (13 * 13 * 3 * block[0]) >> 11; int i, j; - for (i = 0; i < 4; i++, block += 8) + for (i = 0; i < 4; i++, block += 4) for (j = 0; j < 4; j++) block[j] = dc; } @@ -121,10 +131,11 @@ static void rv34_inv_transform_dc_noround_c(DCTELEM *block) av_cold void ff_rv34dsp_init(RV34DSPContext *c, DSPContext* dsp) { - c->rv34_inv_transform_tab[0] = rv34_inv_transform_c; - c->rv34_inv_transform_tab[1] = rv34_inv_transform_noround_c; - c->rv34_inv_transform_dc_tab[0] = rv34_inv_transform_dc_c; - c->rv34_inv_transform_dc_tab[1] = rv34_inv_transform_dc_noround_c; + c->rv34_inv_transform = rv34_inv_transform_noround_c; + c->rv34_inv_transform_dc = rv34_inv_transform_dc_noround_c; + + c->rv34_idct_add = rv34_idct_add_c; + c->rv34_idct_dc_add = rv34_idct_dc_add_c; if (HAVE_NEON) ff_rv34dsp_init_neon(c, dsp); diff --git a/libavcodec/rv34dsp.h b/libavcodec/rv34dsp.h index 6f53a09928..fe8fcaa8dd 100644 --- a/libavcodec/rv34dsp.h +++ b/libavcodec/rv34dsp.h @@ -36,6 +36,10 @@ typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/, typedef void (*rv34_inv_transform_func)(DCTELEM *block); +typedef void (*rv34_idct_add_func)(uint8_t *dst, int stride, DCTELEM *block); +typedef void (*rv34_idct_dc_add_func)(uint8_t *dst, int stride, + int dc); + typedef void (*rv40_weak_loop_filter_func)(uint8_t *src, int stride, int filter_p1, int filter_q1, int alpha, int beta, @@ -55,8 +59,10 @@ typedef struct RV34DSPContext { h264_chroma_mc_func put_chroma_pixels_tab[3]; h264_chroma_mc_func avg_chroma_pixels_tab[3]; rv40_weight_func rv40_weight_pixels_tab[2]; - rv34_inv_transform_func rv34_inv_transform_tab[2]; - void (*rv34_inv_transform_dc_tab[2])(DCTELEM *block); + rv34_inv_transform_func rv34_inv_transform; + rv34_inv_transform_func rv34_inv_transform_dc; + rv34_idct_add_func rv34_idct_add; + rv34_idct_dc_add_func rv34_idct_dc_add; rv40_weak_loop_filter_func rv40_weak_loop_filter[2]; rv40_strong_loop_filter_func rv40_strong_loop_filter[2]; rv40_loop_filter_strength_func rv40_loop_filter_strength[2]; diff --git a/libavcodec/utils.c b/libavcodec/utils.c index 462288446b..7ea9c54f31 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -25,6 +25,7 @@ * utils. */ +#include "libavutil/avassert.h" #include "libavutil/avstring.h" #include "libavutil/crc.h" #include "libavutil/mathematics.h" @@ -102,6 +103,16 @@ void avcodec_init(void) dsputil_static_init(); } +static av_always_inline int codec_is_encoder(AVCodec *codec) +{ + return codec && (codec->encode || codec->encode2); +} + +static av_always_inline int codec_is_decoder(AVCodec *codec) +{ + return codec && codec->decode; +} + void avcodec_register(AVCodec *codec) { AVCodec **p; @@ -260,11 +271,47 @@ void ff_init_buffer_info(AVCodecContext *s, AVFrame *pic) pic->format = s->pix_fmt; } +int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels, + enum AVSampleFormat sample_fmt, const uint8_t *buf, + int buf_size, int align) +{ + int ch, planar, needed_size, ret = 0; + + needed_size = av_samples_get_buffer_size(NULL, nb_channels, + frame->nb_samples, sample_fmt, + align); + if (buf_size < needed_size) + return AVERROR(EINVAL); + + planar = av_sample_fmt_is_planar(sample_fmt); + if (planar && nb_channels > AV_NUM_DATA_POINTERS) { + if (!(frame->extended_data = av_mallocz(nb_channels * + sizeof(*frame->extended_data)))) + return AVERROR(ENOMEM); + } else { + frame->extended_data = frame->data; + } + + if ((ret = av_samples_fill_arrays(frame->extended_data, &frame->linesize[0], + buf, nb_channels, frame->nb_samples, + sample_fmt, align)) < 0) { + if (frame->extended_data != frame->data) + av_free(frame->extended_data); + return ret; + } + if (frame->extended_data != frame->data) { + for (ch = 0; ch < AV_NUM_DATA_POINTERS; ch++) + frame->data[ch] = frame->extended_data[ch]; + } + + return ret; +} + static int audio_get_buffer(AVCodecContext *avctx, AVFrame *frame) { AVCodecInternal *avci = avctx->internal; InternalBuffer *buf; - int buf_size, ret, i, needs_extended_data; + int buf_size, ret; buf_size = av_samples_get_buffer_size(NULL, avctx->channels, frame->nb_samples, avctx->sample_fmt, @@ -272,9 +319,6 @@ static int audio_get_buffer(AVCodecContext *avctx, AVFrame *frame) if (buf_size < 0) return AVERROR(EINVAL); - needs_extended_data = av_sample_fmt_is_planar(avctx->sample_fmt) && - avctx->channels > AV_NUM_DATA_POINTERS; - /* allocate InternalBuffer if needed */ if (!avci->buffer) { avci->buffer = av_mallocz(sizeof(InternalBuffer)); @@ -306,48 +350,31 @@ static int audio_get_buffer(AVCodecContext *avctx, AVFrame *frame) /* if there is no previous buffer or the previous buffer cannot be used as-is, allocate a new buffer and/or rearrange the channel pointers */ if (!buf->extended_data) { - /* if the channel pointers will fit, just set extended_data to data, - otherwise allocate the extended_data channel pointers */ - if (needs_extended_data) { - buf->extended_data = av_mallocz(avctx->channels * - sizeof(*buf->extended_data)); - if (!buf->extended_data) + if (!buf->data[0]) { + if (!(buf->data[0] = av_mallocz(buf_size))) return AVERROR(ENOMEM); - } else { - buf->extended_data = buf->data; - } - - /* if there is a previous buffer and it is large enough, reuse it and - just fill-in new channel pointers and linesize, otherwise allocate - a new buffer */ - if (buf->extended_data[0]) { - ret = av_samples_fill_arrays(buf->extended_data, &buf->linesize[0], - buf->extended_data[0], avctx->channels, - frame->nb_samples, avctx->sample_fmt, - 32); - } else { - ret = av_samples_alloc(buf->extended_data, &buf->linesize[0], - avctx->channels, frame->nb_samples, - avctx->sample_fmt, 32); + buf->audio_data_size = buf_size; } - if (ret) + if ((ret = avcodec_fill_audio_frame(frame, avctx->channels, + avctx->sample_fmt, buf->data[0], + buf->audio_data_size, 32))) return ret; - /* if data was not used for extended_data, we need to copy as many of - the extended_data channel pointers as will fit */ - if (needs_extended_data) { - for (i = 0; i < AV_NUM_DATA_POINTERS; i++) - buf->data[i] = buf->extended_data[i]; - } - buf->audio_data_size = buf_size; - buf->nb_channels = avctx->channels; + if (frame->extended_data == frame->data) + buf->extended_data = buf->data; + else + buf->extended_data = frame->extended_data; + memcpy(buf->data, frame->data, sizeof(frame->data)); + buf->linesize[0] = frame->linesize[0]; + buf->nb_channels = avctx->channels; + } else { + /* copy InternalBuffer info to the AVFrame */ + frame->extended_data = buf->extended_data; + frame->linesize[0] = buf->linesize[0]; + memcpy(frame->data, buf->data, sizeof(frame->data)); } - /* copy InternalBuffer info to the AVFrame */ frame->type = FF_BUFFER_TYPE_INTERNAL; - frame->extended_data = buf->extended_data; - frame->linesize[0] = buf->linesize[0]; - memcpy(frame->data, buf->data, sizeof(frame->data)); if (avctx->pkt) { frame->pkt_pts = avctx->pkt->pts; @@ -732,7 +759,7 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVD /* if the decoder init function was already called previously, free the already allocated subtitle_header before overwriting it */ - if (codec->decode) + if (codec_is_decoder(codec)) av_freep(&avctx->subtitle_header); #define SANE_NB_CHANNELS 128U @@ -789,7 +816,7 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVD ret = AVERROR(EINVAL); goto free_and_end; } - if (avctx->codec->encode) { + if (codec_is_encoder(avctx->codec)) { int i; if (avctx->codec->sample_fmts) { for (i = 0; avctx->codec->sample_fmts[i] != AV_SAMPLE_FMT_NONE; i++) @@ -870,21 +897,225 @@ free_and_end: goto end; } -int attribute_align_arg avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size, - const short *samples) +int ff_alloc_packet(AVPacket *avpkt, int size) { - if(buf_size < FF_MIN_BUFFER_SIZE && 0){ - av_log(avctx, AV_LOG_ERROR, "buffer smaller than minimum size\n"); - return -1; + if (size > INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE) + return AVERROR(EINVAL); + + if (avpkt->data) { + uint8_t *pkt_data; + int pkt_size; + + if (avpkt->size < size) + return AVERROR(EINVAL); + + pkt_data = avpkt->data; + pkt_size = avpkt->size; + av_init_packet(avpkt); + avpkt->data = pkt_data; + avpkt->size = pkt_size; + return 0; + } else { + return av_new_packet(avpkt, size); } - if((avctx->codec->capabilities & CODEC_CAP_DELAY) || samples){ - int ret = avctx->codec->encode(avctx, buf, buf_size, samples); - avctx->frame_number++; - return ret; - }else +} + +int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx, + AVPacket *avpkt, + const AVFrame *frame, + int *got_packet_ptr) +{ + int ret; + int user_packet = !!avpkt->data; + int nb_samples; + + if (!(avctx->codec->capabilities & CODEC_CAP_DELAY) && !frame) { + av_init_packet(avpkt); + avpkt->size = 0; return 0; + } + + /* check for valid frame size */ + if (frame) { + nb_samples = frame->nb_samples; + if (avctx->codec->capabilities & CODEC_CAP_SMALL_LAST_FRAME) { + if (nb_samples > avctx->frame_size) + return AVERROR(EINVAL); + } else if (!(avctx->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE)) { + if (nb_samples != avctx->frame_size) + return AVERROR(EINVAL); + } + } else { + nb_samples = avctx->frame_size; + } + + if (avctx->codec->encode2) { + *got_packet_ptr = 0; + ret = avctx->codec->encode2(avctx, avpkt, frame, got_packet_ptr); + if (!ret && *got_packet_ptr && + !(avctx->codec->capabilities & CODEC_CAP_DELAY)) { + avpkt->pts = frame->pts; + avpkt->duration = av_rescale_q(frame->nb_samples, + (AVRational){ 1, avctx->sample_rate }, + avctx->time_base); + } + } else { + /* for compatibility with encoders not supporting encode2(), we need to + allocate a packet buffer if the user has not provided one or check + the size otherwise */ + int fs_tmp = 0; + int buf_size = avpkt->size; + if (!user_packet) { + if (avctx->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE) { + av_assert0(av_get_bits_per_sample(avctx->codec_id) != 0); + buf_size = nb_samples * avctx->channels * + av_get_bits_per_sample(avctx->codec_id) / 8; + } else { + /* this is a guess as to the required size. + if an encoder needs more than this, it should probably + implement encode2() */ + buf_size = 2 * avctx->frame_size * avctx->channels * + av_get_bytes_per_sample(avctx->sample_fmt); + buf_size += FF_MIN_BUFFER_SIZE; + } + } + if ((ret = ff_alloc_packet(avpkt, buf_size))) + return ret; + + /* Encoders using AVCodec.encode() that support + CODEC_CAP_SMALL_LAST_FRAME require avctx->frame_size to be set to + the smaller size when encoding the last frame. + This code can be removed once all encoders supporting + CODEC_CAP_SMALL_LAST_FRAME use encode2() */ + if ((avctx->codec->capabilities & CODEC_CAP_SMALL_LAST_FRAME) && + nb_samples < avctx->frame_size) { + fs_tmp = avctx->frame_size; + avctx->frame_size = nb_samples; + } + + /* encode the frame */ + ret = avctx->codec->encode(avctx, avpkt->data, avpkt->size, + frame ? frame->data[0] : NULL); + if (ret >= 0) { + if (!ret) { + /* no output. if the packet data was allocated by libavcodec, + free it */ + if (!user_packet) + av_freep(&avpkt->data); + } else { + if (avctx->coded_frame) + avpkt->pts = avctx->coded_frame->pts; + /* Set duration for final small packet. This can be removed + once all encoders supporting CODEC_CAP_SMALL_LAST_FRAME use + encode2() */ + if (fs_tmp) { + avpkt->duration = av_rescale_q(avctx->frame_size, + (AVRational){ 1, avctx->sample_rate }, + avctx->time_base); + } + } + avpkt->size = ret; + *got_packet_ptr = (ret > 0); + ret = 0; + } + + if (fs_tmp) + avctx->frame_size = fs_tmp; + } + if (!ret) + avctx->frame_number++; + + /* NOTE: if we add any audio encoders which output non-keyframe packets, + this needs to be moved to the encoders, but for now we can do it + here to simplify things */ + avpkt->flags |= AV_PKT_FLAG_KEY; + + return ret; } +#if FF_API_OLD_DECODE_AUDIO +int attribute_align_arg avcodec_encode_audio(AVCodecContext *avctx, + uint8_t *buf, int buf_size, + const short *samples) +{ + AVPacket pkt; + AVFrame frame0; + AVFrame *frame; + int ret, samples_size, got_packet; + + av_init_packet(&pkt); + pkt.data = buf; + pkt.size = buf_size; + + if (samples) { + frame = &frame0; + avcodec_get_frame_defaults(frame); + + if (avctx->frame_size) { + frame->nb_samples = avctx->frame_size; + } else { + /* if frame_size is not set, the number of samples must be + calculated from the buffer size */ + int64_t nb_samples; + if (!av_get_bits_per_sample(avctx->codec_id)) { + av_log(avctx, AV_LOG_ERROR, "avcodec_encode_audio() does not " + "support this codec\n"); + return AVERROR(EINVAL); + } + nb_samples = (int64_t)buf_size * 8 / + (av_get_bits_per_sample(avctx->codec_id) * + avctx->channels); + if (nb_samples >= INT_MAX) + return AVERROR(EINVAL); + frame->nb_samples = nb_samples; + } + + /* it is assumed that the samples buffer is large enough based on the + relevant parameters */ + samples_size = av_samples_get_buffer_size(NULL, avctx->channels, + frame->nb_samples, + avctx->sample_fmt, 1); + if ((ret = avcodec_fill_audio_frame(frame, avctx->channels, + avctx->sample_fmt, + samples, samples_size, 1))) + return ret; + + /* fabricate frame pts from sample count. + this is needed because the avcodec_encode_audio() API does not have + a way for the user to provide pts */ + if(avctx->sample_rate && avctx->time_base.num) + frame->pts = av_rescale_q(avctx->internal->sample_count, + (AVRational){ 1, avctx->sample_rate }, + avctx->time_base); + else + frame->pts = AV_NOPTS_VALUE; + avctx->internal->sample_count += frame->nb_samples; + } else { + frame = NULL; + } + + got_packet = 0; + ret = avcodec_encode_audio2(avctx, &pkt, frame, &got_packet); + if (!ret && got_packet && avctx->coded_frame) { + avctx->coded_frame->pts = pkt.pts; + avctx->coded_frame->key_frame = !!(pkt.flags & AV_PKT_FLAG_KEY); + } + /* free any side data since we cannot return it */ + if (pkt.side_data_elems > 0) { + int i; + for (i = 0; i < pkt.side_data_elems; i++) + av_free(pkt.side_data[i].data); + av_freep(&pkt.side_data); + pkt.side_data_elems = 0; + } + + if (frame && frame->extended_data != frame->data) + av_free(frame->extended_data); + + return ret ? ret : pkt.size; +} +#endif + int attribute_align_arg avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size, const AVFrame *pict) { @@ -1187,7 +1418,7 @@ av_cold int avcodec_close(AVCodecContext *avctx) av_opt_free(avctx->priv_data); av_opt_free(avctx); av_freep(&avctx->priv_data); - if(avctx->codec && avctx->codec->encode) + if (codec_is_encoder(avctx->codec)) av_freep(&avctx->extradata); avctx->codec = NULL; avctx->active_thread_type = 0; @@ -1216,7 +1447,7 @@ AVCodec *avcodec_find_encoder(enum CodecID id) p = first_avcodec; id= remap_deprecated_codec_id(id); while (p) { - if (p->encode != NULL && p->id == id) { + if (codec_is_encoder(p) && p->id == id) { if (p->capabilities & CODEC_CAP_EXPERIMENTAL && !experimental) { experimental = p; } else @@ -1234,7 +1465,7 @@ AVCodec *avcodec_find_encoder_by_name(const char *name) return NULL; p = first_avcodec; while (p) { - if (p->encode != NULL && strcmp(name,p->name) == 0) + if (codec_is_encoder(p) && strcmp(name,p->name) == 0) return p; p = p->next; } @@ -1247,7 +1478,7 @@ AVCodec *avcodec_find_decoder(enum CodecID id) p = first_avcodec; id= remap_deprecated_codec_id(id); while (p) { - if (p->decode != NULL && p->id == id) { + if (codec_is_decoder(p) && p->id == id) { if (p->capabilities & CODEC_CAP_EXPERIMENTAL && !experimental) { experimental = p; } else @@ -1265,7 +1496,7 @@ AVCodec *avcodec_find_decoder_by_name(const char *name) return NULL; p = first_avcodec; while (p) { - if (p->decode != NULL && strcmp(name,p->name) == 0) + if (codec_is_decoder(p) && strcmp(name,p->name) == 0) return p; p = p->next; } diff --git a/libavcodec/version.h b/libavcodec/version.h index fd0c3cf1da..f8bb5c69ae 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -21,7 +21,7 @@ #define AVCODEC_VERSION_H #define LIBAVCODEC_VERSION_MAJOR 53 -#define LIBAVCODEC_VERSION_MINOR 55 +#define LIBAVCODEC_VERSION_MINOR 56 #define LIBAVCODEC_VERSION_MICRO 105 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ @@ -123,5 +123,8 @@ #ifndef FF_API_AVFRAME_AGE #define FF_API_AVFRAME_AGE (LIBAVCODEC_VERSION_MAJOR < 54) #endif +#ifndef FF_API_OLD_ENCODE_AUDIO +#define FF_API_OLD_ENCODE_AUDIO (LIBAVCODEC_VERSION_MAJOR < 54) +#endif #endif /* AVCODEC_VERSION_H */ diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm index a70ad07e87..75bf1ae08a 100644 --- a/libavcodec/x86/rv34dsp.asm +++ b/libavcodec/x86/rv34dsp.asm @@ -35,21 +35,84 @@ SECTION .text sar %1, 10 %endmacro -%macro rv34_idct_dequant4x4_dc 1 -cglobal rv34_idct_dequant4x4_%1_mmx2, 1, 2, 0 +%macro rv34_idct 1 +cglobal rv34_idct_%1_mmx2, 1, 2, 0 movsx r1, word [r0] IDCT_DC r1 - movd mm0, r1d - pshufw mm0, mm0, 0 - movq [r0+ 0], mm0 - movq [r0+16], mm0 - movq [r0+32], mm0 - movq [r0+48], mm0 + movd m0, r1d + pshufw m0, m0, 0 + movq [r0+ 0], m0 + movq [r0+ 8], m0 + movq [r0+16], m0 + movq [r0+24], m0 REP_RET %endmacro INIT_MMX %define IDCT_DC IDCT_DC_ROUND -rv34_idct_dequant4x4_dc dc +rv34_idct dc %define IDCT_DC IDCT_DC_NOROUND -rv34_idct_dequant4x4_dc dc_noround +rv34_idct dc_noround + +; ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc); +cglobal rv34_idct_dc_add_mmx, 3, 3 + ; calculate DC + IDCT_DC_ROUND r2 + pxor m1, m1 + movd m0, r2 + psubw m1, m0 + packuswb m0, m0 + packuswb m1, m1 + punpcklbw m0, m0 + punpcklbw m1, m1 + punpcklwd m0, m0 + punpcklwd m1, m1 + + ; add DC + lea r2, [r0+r1*2] + movh m2, [r0] + movh m3, [r0+r1] + movh m4, [r2] + movh m5, [r2+r1] + paddusb m2, m0 + paddusb m3, m0 + paddusb m4, m0 + paddusb m5, m0 + psubusb m2, m1 + psubusb m3, m1 + psubusb m4, m1 + psubusb m5, m1 + movh [r0], m2 + movh [r0+r1], m3 + movh [r2], m4 + movh [r2+r1], m5 + RET + +; ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc); +INIT_XMM +cglobal rv34_idct_dc_add_sse4, 3, 3, 6 + ; load data + IDCT_DC_ROUND r2 + pxor m1, m1 + + ; calculate DC + movd m0, r2 + lea r2, [r0+r1*2] + movd m2, [r0] + movd m3, [r0+r1] + pshuflw m0, m0, 0 + movd m4, [r2] + movd m5, [r2+r1] + punpcklqdq m0, m0 + punpckldq m2, m3 + punpckldq m4, m5 + punpcklbw m2, m1 + punpcklbw m4, m1 + paddw m2, m0 + paddw m4, m0 + packuswb m2, m4 + movd [r0], m2 + pextrd [r0+r1], m2, 1 + pextrd [r2], m2, 2 + pextrd [r2+r1], m2, 3 + RET diff --git a/libavcodec/x86/rv34dsp_init.c b/libavcodec/x86/rv34dsp_init.c index 4317e9b23b..f3d2e172e7 100644 --- a/libavcodec/x86/rv34dsp_init.c +++ b/libavcodec/x86/rv34dsp_init.c @@ -24,17 +24,22 @@ #include "libavcodec/dsputil.h" #include "libavcodec/rv34dsp.h" -void ff_rv34_idct_dequant4x4_dc_mmx2(DCTELEM *block); -void ff_rv34_idct_dequant4x4_dc_noround_mmx2(DCTELEM *block); +void ff_rv34_idct_dc_mmx2(DCTELEM *block); +void ff_rv34_idct_dc_noround_mmx2(DCTELEM *block); +void ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc); +void ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc); av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp) { #if HAVE_YASM int mm_flags = av_get_cpu_flags(); + if (mm_flags & AV_CPU_FLAG_MMX) + c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx; if (mm_flags & AV_CPU_FLAG_MMX2) { - c->rv34_inv_transform_dc_tab[0] = ff_rv34_idct_dequant4x4_dc_mmx2; - c->rv34_inv_transform_dc_tab[1] = ff_rv34_idct_dequant4x4_dc_noround_mmx2; + c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2; } + if (mm_flags & AV_CPU_FLAG_SSE4) + c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4; #endif } |