diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-01-15 01:07:00 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-01-15 01:16:54 +0100 |
commit | 4640da7e58509996ff03b1a0b018ca8f337391c7 (patch) | |
tree | 732195f8bc4987e4974df716789044c7e3db0836 | |
parent | a91f2066651416e0f9315e7fb0132587352c75dc (diff) | |
parent | 4cd0bdae9a62d1f0366e60603222762af31e5289 (diff) | |
download | ffmpeg-4640da7e58509996ff03b1a0b018ca8f337391c7.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
sgidec: Use bytestream2 functions to prevent buffer overreads.
cosmetics: Move static and inline attributes to more standard places.
configure: provide libavfilter/version.h header to get_version()
swscale: change yuv2yuvX code to use cpuflag().
libx264: Don't leave max_b_frames as -1 if the user didn't set it
FATE: convert output to rgba for the targa tests which currently output pal8
fate: add missing reference files for targa tests in 9c2f9b0e2
FATE: enable the 2 remaining targa conformance suite tests
targa: add support for rgb555 palette
FATE: fix targa tests on big-endian systems
Conflicts:
libavcodec/sgidec.c
libavcodec/targa.c
libswscale/x86/output.asm
tests/fate/image.mak
Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/bytestream.h | 36 | ||||
-rw-r--r-- | libavcodec/g722.c | 2 | ||||
-rw-r--r-- | libavcodec/h264_loopfilter.c | 51 | ||||
-rw-r--r-- | libavcodec/libx264.c | 3 | ||||
-rw-r--r-- | libavcodec/sgidec.c | 123 | ||||
-rw-r--r-- | libavcodec/targa.c | 33 | ||||
-rw-r--r-- | libavfilter/vf_fade.c | 2 | ||||
-rw-r--r-- | libswscale/utils.c | 2 | ||||
-rw-r--r-- | libswscale/x86/output.asm | 117 | ||||
-rw-r--r-- | tests/fate/image.mak | 6 | ||||
-rw-r--r-- | tests/ref/fate/targa-conformance-CCM8 | 1 | ||||
-rw-r--r-- | tests/ref/fate/targa-conformance-UCM8 | 1 |
12 files changed, 231 insertions, 146 deletions
diff --git a/libavcodec/bytestream.h b/libavcodec/bytestream.h index 4e17e9d9f7..71c70aac84 100644 --- a/libavcodec/bytestream.h +++ b/libavcodec/bytestream.h @@ -75,6 +75,42 @@ DEF (byte, 1, AV_RB8 , AV_WB8 ) #undef DEF64 #undef DEF_T +#if HAVE_BIGENDIAN +# define bytestream2_get_ne16 bytestream2_get_be16 +# define bytestream2_get_ne24 bytestream2_get_be24 +# define bytestream2_get_ne32 bytestream2_get_be32 +# define bytestream2_get_ne64 bytestream2_get_be64 +# define bytestream2_get_ne16u bytestream2_get_be16u +# define bytestream2_get_ne24u bytestream2_get_be24u +# define bytestream2_get_ne32u bytestream2_get_be32u +# define bytestream2_get_ne64u bytestream2_get_be64u +# define bytestream2_put_ne16 bytestream2_put_be16 +# define bytestream2_put_ne24 bytestream2_put_be24 +# define bytestream2_put_ne32 bytestream2_put_be32 +# define bytestream2_put_ne64 bytestream2_put_be64 +# define bytestream2_peek_ne16 bytestream2_peek_be16 +# define bytestream2_peek_ne24 bytestream2_peek_be24 +# define bytestream2_peek_ne32 bytestream2_peek_be32 +# define bytestream2_peek_ne64 bytestream2_peek_be64 +#else +# define bytestream2_get_ne16 bytestream2_get_le16 +# define bytestream2_get_ne24 bytestream2_get_le24 +# define bytestream2_get_ne32 bytestream2_get_le32 +# define bytestream2_get_ne64 bytestream2_get_le64 +# define bytestream2_get_ne16u bytestream2_get_le16u +# define bytestream2_get_ne24u bytestream2_get_le24u +# define bytestream2_get_ne32u bytestream2_get_le32u +# define bytestream2_get_ne64u bytestream2_get_le64u +# define bytestream2_put_ne16 bytestream2_put_le16 +# define bytestream2_put_ne24 bytestream2_put_le24 +# define bytestream2_put_ne32 bytestream2_put_le32 +# define bytestream2_put_ne64 bytestream2_put_le64 +# define bytestream2_peek_ne16 bytestream2_peek_le16 +# define bytestream2_peek_ne24 bytestream2_peek_le24 +# define bytestream2_peek_ne32 bytestream2_peek_le32 +# define bytestream2_peek_ne64 bytestream2_peek_le64 +#endif + static av_always_inline void bytestream2_init(GetByteContext *g, const uint8_t *buf, int buf_size) { diff --git a/libavcodec/g722.c b/libavcodec/g722.c index e8e74242b8..2c04c40b56 100644 --- a/libavcodec/g722.c +++ b/libavcodec/g722.c @@ -129,7 +129,7 @@ static void do_adaptive_prediction(struct G722Band *band, const int cur_diff) band->prev_qtzd_reconst = cur_qtzd_reconst; } -static int inline linear_scale_factor(const int log_factor) +static inline int linear_scale_factor(const int log_factor) { const int wd1 = inv_log2_table[(log_factor >> 6) & 31]; const int shift = log_factor >> 11; diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c index 67399395a1..f3a5ff6783 100644 --- a/libavcodec/h264_loopfilter.c +++ b/libavcodec/h264_loopfilter.c @@ -101,7 +101,11 @@ static const uint8_t tc0_table[52*3][4] = { }; /* intra: 0 if this loopfilter call is guaranteed to be inter (bS < 4), 1 if it might be intra (bS == 4) */ -static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, const int16_t bS[4], unsigned int qp, int a, int b, H264Context *h, int intra ) { +static av_always_inline void filter_mb_edgev(uint8_t *pix, int stride, + const int16_t bS[4], + unsigned int qp, int a, int b, + H264Context *h, int intra) +{ const unsigned int index_a = qp + a; const int alpha = alpha_table[index_a]; const int beta = beta_table[qp + b]; @@ -118,7 +122,12 @@ static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, const in h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta); } } -static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, const int16_t bS[4], unsigned int qp, int a, int b, H264Context *h, int intra ) { + +static av_always_inline void filter_mb_edgecv(uint8_t *pix, int stride, + const int16_t bS[4], + unsigned int qp, int a, int b, + H264Context *h, int intra) +{ const unsigned int index_a = qp + a; const int alpha = alpha_table[index_a]; const int beta = beta_table[qp + b]; @@ -136,7 +145,12 @@ static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, const i } } -static void av_always_inline filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, const int16_t bS[7], int bsi, int qp, int a, int b, int intra ) { +static av_always_inline void filter_mb_mbaff_edgev(H264Context *h, uint8_t *pix, + int stride, + const int16_t bS[7], int bsi, + int qp, int a, int b, + int intra) +{ const unsigned int index_a = qp + a; const int alpha = alpha_table[index_a]; const int beta = beta_table[qp + b]; @@ -153,7 +167,13 @@ static void av_always_inline filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix h->h264dsp.h264_h_loop_filter_luma_mbaff_intra(pix, stride, alpha, beta); } } -static void av_always_inline filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, const int16_t bS[7], int bsi, int qp, int a, int b, int intra ) { + +static av_always_inline void filter_mb_mbaff_edgecv(H264Context *h, + uint8_t *pix, int stride, + const int16_t bS[7], + int bsi, int qp, int a, + int b, int intra) +{ const unsigned int index_a = qp + a; const int alpha = alpha_table[index_a]; const int beta = beta_table[qp + b]; @@ -171,7 +191,11 @@ static void av_always_inline filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pi } } -static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, const int16_t bS[4], unsigned int qp, int a, int b, H264Context *h, int intra ) { +static av_always_inline void filter_mb_edgeh(uint8_t *pix, int stride, + const int16_t bS[4], + unsigned int qp, int a, int b, + H264Context *h, int intra) +{ const unsigned int index_a = qp + a; const int alpha = alpha_table[index_a]; const int beta = beta_table[qp + b]; @@ -189,7 +213,11 @@ static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, const in } } -static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, const int16_t bS[4], unsigned int qp, int a, int b, H264Context *h, int intra ) { +static av_always_inline void filter_mb_edgech(uint8_t *pix, int stride, + const int16_t bS[4], + unsigned int qp, int a, int b, + H264Context *h, int intra) +{ const unsigned int index_a = qp + a; const int alpha = alpha_table[index_a]; const int beta = beta_table[qp + b]; @@ -207,8 +235,15 @@ static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, const i } } -static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, - unsigned int linesize, unsigned int uvlinesize, int pixel_shift) { +static av_always_inline void h264_filter_mb_fast_internal(H264Context *h, + int mb_x, int mb_y, + uint8_t *img_y, + uint8_t *img_cb, + uint8_t *img_cr, + unsigned int linesize, + unsigned int uvlinesize, + int pixel_shift) +{ MpegEncContext * const s = &h->s; int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); int chroma444 = CHROMA444; diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c index f23b3fa9ae..59257f0199 100644 --- a/libavcodec/libx264.c +++ b/libavcodec/libx264.c @@ -532,6 +532,9 @@ static av_cold int X264_init(AVCodecContext *avctx) // update AVCodecContext with x264 parameters avctx->has_b_frames = x4->params.i_bframe ? x4->params.i_bframe_pyramid ? 2 : 1 : 0; + if (avctx->max_b_frames < 0) + avctx->max_b_frames = 0; + avctx->bit_rate = x4->params.rc.i_bitrate*1000; #if FF_API_X264_GLOBAL_OPTS avctx->crf = x4->params.rc.f_rf_constant; diff --git a/libavcodec/sgidec.c b/libavcodec/sgidec.c index b84949daff..6a98718131 100644 --- a/libavcodec/sgidec.c +++ b/libavcodec/sgidec.c @@ -32,26 +32,27 @@ typedef struct SgiState { unsigned int depth; unsigned int bytes_per_channel; int linesize; + GetByteContext g; } SgiState; /** * Expand an RLE row into a channel. - * @param in_buf input buffer - * @param in_end end of input buffer + * @param s the current image state * @param out_buf Points to one line after the output buffer. * @param out_end end of line in output buffer * @param pixelstride pixel stride of input buffer * @return size of output in bytes, -1 if buffer overflows */ -static int expand_rle_row(const uint8_t *in_buf, const uint8_t* in_end, - unsigned char *out_buf, uint8_t* out_end, int pixelstride) +static int expand_rle_row(SgiState *s, uint8_t *out_buf, + uint8_t *out_end, int pixelstride) { unsigned char pixel, count; unsigned char *orig = out_buf; while (1) { - if(in_buf + 1 > in_end) return -1; - pixel = bytestream_get_byte(&in_buf); + if (bytestream2_get_bytes_left(&s->g) < 1) + return AVERROR_INVALIDDATA; + pixel = bytestream2_get_byteu(&s->g); if (!(count = (pixel & 0x7f))) { return (out_buf - orig) / pixelstride; } @@ -61,11 +62,11 @@ static int expand_rle_row(const uint8_t *in_buf, const uint8_t* in_end, if (pixel & 0x80) { while (count--) { - *out_buf = bytestream_get_byte(&in_buf); + *out_buf = bytestream2_get_byte(&s->g); out_buf += pixelstride; } } else { - pixel = bytestream_get_byte(&in_buf); + pixel = bytestream2_get_byte(&s->g); while (count--) { *out_buf = pixel; @@ -78,85 +79,73 @@ static int expand_rle_row(const uint8_t *in_buf, const uint8_t* in_end, /** * Read a run length encoded SGI image. * @param out_buf output buffer - * @param in_buf input buffer - * @param in_end end of input buffer * @param s the current image state * @return 0 if no error, else return error number. */ -static int read_rle_sgi(unsigned char* out_buf, const uint8_t *in_buf, - const uint8_t *in_end, SgiState* s) +static int read_rle_sgi(uint8_t *out_buf, SgiState *s) { uint8_t *dest_row; unsigned int len = s->height * s->depth * 4; - const uint8_t *start_table = in_buf; + GetByteContext g_table = s->g; unsigned int y, z; unsigned int start_offset; /* size of RLE offset and length tables */ - if(len * 2 > in_end - in_buf) { + if (len * 2 > bytestream2_get_bytes_left(&s->g)) { return AVERROR_INVALIDDATA; } - in_buf -= SGI_HEADER_SIZE; for (z = 0; z < s->depth; z++) { dest_row = out_buf; for (y = 0; y < s->height; y++) { dest_row -= s->linesize; - start_offset = bytestream_get_be32(&start_table); - if(start_offset > in_end - in_buf) { + start_offset = bytestream2_get_be32(&g_table); + bytestream2_seek(&s->g, start_offset, SEEK_SET); + if (expand_rle_row(s, dest_row + z, dest_row + FFABS(s->linesize), + s->depth) != s->width) { return AVERROR_INVALIDDATA; } - if (expand_rle_row(in_buf + start_offset, in_end, dest_row + z, - dest_row + FFABS(s->linesize), s->depth) != s->width) - return AVERROR_INVALIDDATA; } } return 0; } -static av_always_inline void copy_loop(uint8_t *out_buf, const uint8_t *in_buf, - unsigned offset, unsigned bytes_per_channel, - SgiState *s) -{ - int x, y, z; - for (y = s->height - 1; y >= 0; y--) { - uint8_t *line = out_buf + (y * s->linesize); - for (x = s->width; x > 0; x--) { - const uint8_t *ptr = in_buf; - in_buf += bytes_per_channel; - for(z = 0; z < s->depth; z ++) { - memcpy(line, ptr, bytes_per_channel); - line += bytes_per_channel; - ptr += offset; - } - } - } -} - /** * Read an uncompressed SGI image. * @param out_buf output buffer * @param out_end end ofoutput buffer - * @param in_buf input buffer - * @param in_end end of input buffer * @param s the current image state * @return 0 if read success, otherwise return -1. */ static int read_uncompressed_sgi(unsigned char* out_buf, uint8_t* out_end, - const uint8_t *in_buf, const uint8_t *in_end, SgiState* s) + SgiState *s) { + int x, y, z; unsigned int offset = s->height * s->width * s->bytes_per_channel; + GetByteContext gp[4]; /* Test buffer size. */ - if (offset * s->depth > in_end - in_buf) { - return -1; + if (offset * s->depth > bytestream2_get_bytes_left(&s->g)) + return AVERROR_INVALIDDATA; + + /* Create a reader for each plane */ + for (z = 0; z < s->depth; z++) { + gp[z] = s->g; + bytestream2_skip(&gp[z], z * offset); } - if (s->bytes_per_channel == 2) { - copy_loop(out_buf, in_buf, offset, 2, s); - } else { - av_assert1(s->bytes_per_channel == 1); - copy_loop(out_buf, in_buf, offset, 1, s); + for (y = s->height - 1; y >= 0; y--) { + out_end = out_buf + (y * s->linesize); + if (s->bytes_per_channel == 1) { + for (x = s->width; x > 0; x--) + for (z = 0; z < s->depth; z++) + *out_end++ = bytestream2_get_byteu(&gp[z]); + } else { + uint16_t *out16 = (uint16_t *)out_end; + for (x = s->width; x > 0; x--) + for (z = 0; z < s->depth; z++) + *out16++ = bytestream2_get_ne16u(&gp[z]); + } } return 0; } @@ -165,33 +154,31 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt) { - const uint8_t *in_buf = avpkt->data; - int buf_size = avpkt->size; SgiState *s = avctx->priv_data; AVFrame *picture = data; AVFrame *p = &s->picture; - const uint8_t *in_end = in_buf + buf_size; unsigned int dimension, rle; int ret = 0; uint8_t *out_buf, *out_end; - if (buf_size < SGI_HEADER_SIZE){ - av_log(avctx, AV_LOG_ERROR, "buf_size too small (%d)\n", buf_size); - return -1; + bytestream2_init(&s->g, avpkt->data, avpkt->size); + if (bytestream2_get_bytes_left(&s->g) < SGI_HEADER_SIZE) { + av_log(avctx, AV_LOG_ERROR, "buf_size too small (%d)\n", avpkt->size); + return AVERROR_INVALIDDATA; } /* Test for SGI magic. */ - if (bytestream_get_be16(&in_buf) != SGI_MAGIC) { + if (bytestream2_get_be16(&s->g) != SGI_MAGIC) { av_log(avctx, AV_LOG_ERROR, "bad magic number\n"); - return -1; + return AVERROR_INVALIDDATA; } - rle = bytestream_get_byte(&in_buf); - s->bytes_per_channel = bytestream_get_byte(&in_buf); - dimension = bytestream_get_be16(&in_buf); - s->width = bytestream_get_be16(&in_buf); - s->height = bytestream_get_be16(&in_buf); - s->depth = bytestream_get_be16(&in_buf); + rle = bytestream2_get_byte(&s->g); + s->bytes_per_channel = bytestream2_get_byte(&s->g); + dimension = bytestream2_get_be16(&s->g); + s->width = bytestream2_get_be16(&s->g); + s->height = bytestream2_get_be16(&s->g); + s->depth = bytestream2_get_be16(&s->g); if (s->bytes_per_channel != 1 && (s->bytes_per_channel != 2 || rle)) { av_log(avctx, AV_LOG_ERROR, "wrong channel number\n"); @@ -237,19 +224,19 @@ static int decode_frame(AVCodecContext *avctx, s->linesize = p->linesize[0]; /* Skip header. */ - in_buf += SGI_HEADER_SIZE - 12; + bytestream2_seek(&s->g, SGI_HEADER_SIZE, SEEK_SET); if (rle) { - ret = read_rle_sgi(out_end, in_buf, in_end, s); + ret = read_rle_sgi(out_end, s); } else { - ret = read_uncompressed_sgi(out_buf, out_end, in_buf, in_end, s); + ret = read_uncompressed_sgi(out_buf, out_end, s); } if (ret == 0) { *picture = s->picture; *data_size = sizeof(AVPicture); - return buf_size; + return avpkt->size; } else { - return -1; + return ret; } } diff --git a/libavcodec/targa.c b/libavcodec/targa.c index 4ab560d960..57a4fee22b 100644 --- a/libavcodec/targa.c +++ b/libavcodec/targa.c @@ -178,24 +178,45 @@ static int decode_frame(AVCodecContext *avctx, } if(colors){ - size_t pal_size; + int pal_size, pal_sample_size; if((colors + first_clr) > 256){ av_log(avctx, AV_LOG_ERROR, "Incorrect palette: %i colors with offset %i\n", colors, first_clr); return -1; } - if(csize != 24){ + switch (csize) { + case 24: pal_sample_size = 3; break; + case 16: + case 15: pal_sample_size = 2; break; + default: av_log(avctx, AV_LOG_ERROR, "Palette entry size %i bits is not supported\n", csize); return -1; } - pal_size = colors * ((csize + 1) >> 3); + pal_size = colors * pal_sample_size; CHECK_BUFFER_SIZE(buf, buf_end, pal_size, "color table"); if(avctx->pix_fmt != PIX_FMT_PAL8)//should not occur but skip palette anyway buf += pal_size; else{ int t; - int32_t *pal = ((int32_t*)p->data[1]) + first_clr; - for(t = 0; t < colors; t++){ - *pal++ = (0xff<<24) | bytestream_get_le24(&buf); + uint32_t *pal = ((uint32_t *)p->data[1]) + first_clr; + + switch (pal_sample_size) { + case 3: + /* RGB24 */ + for (t = 0; t < colors; t++) + *pal++ = (0xffU<<24) | bytestream_get_le24(&buf); + break; + case 2: + /* RGB555 */ + for (t = 0; t < colors; t++) { + uint32_t v = bytestream_get_le16(&buf); + v = ((v & 0x7C00) << 9) | + ((v & 0x03E0) << 6) | + ((v & 0x001F) << 3); + /* left bit replication */ + v |= (v & 0xE0E0E0U) >> 5; + *pal++ = (0xffU<<24) | v; + } + break; } p->palette_has_changed = 1; } diff --git a/libavfilter/vf_fade.c b/libavfilter/vf_fade.c index 68cb567571..ed499081af 100644 --- a/libavfilter/vf_fade.c +++ b/libavfilter/vf_fade.c @@ -149,7 +149,7 @@ static av_cold void uninit(AVFilterContext *ctx) static int query_formats(AVFilterContext *ctx) { - const static enum PixelFormat pix_fmts[] = { + static const enum PixelFormat pix_fmts[] = { PIX_FMT_YUV444P, PIX_FMT_YUV422P, PIX_FMT_YUV420P, PIX_FMT_YUV411P, PIX_FMT_YUV410P, PIX_FMT_YUVJ444P, PIX_FMT_YUVJ422P, PIX_FMT_YUVJ420P, diff --git a/libswscale/utils.c b/libswscale/utils.c index 13eb07512e..46ebb7f647 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -72,7 +72,7 @@ typedef struct FormatEntry { int is_supported_in, is_supported_out; } FormatEntry; -const static FormatEntry format_entries[PIX_FMT_NB] = { +static const FormatEntry format_entries[PIX_FMT_NB] = { [PIX_FMT_YUV420P] = { 1 , 1 }, [PIX_FMT_YUYV422] = { 1 , 1 }, [PIX_FMT_RGB24] = { 1 , 1 }, diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm index ff72677b9a..c05fc0338d 100644 --- a/libswscale/x86/output.asm +++ b/libswscale/x86/output.asm @@ -56,7 +56,7 @@ SECTION .text ; of 2. $offset is either 0 or 3. $dither holds 8 values. ;----------------------------------------------------------------------------- -%macro yuv2planeX_fn 4 +%macro yuv2planeX_fn 3 %ifdef ARCH_X86_32 %define cntr_reg r1 @@ -66,12 +66,12 @@ SECTION .text %define movsx movsxd %endif -cglobal yuv2planeX_%2_%1, %4, 7, %3 -%if %2 == 8 || %2 == 9 || %2 == 10 +cglobal yuv2planeX_%1, %3, 7, %2 +%if %1 == 8 || %1 == 9 || %1 == 10 pxor m6, m6 -%endif ; %2 == 8/9/10 +%endif ; %1 == 8/9/10 -%if %2 == 8 +%if %1 == 8 %ifdef ARCH_X86_32 %assign pad 0x2c - (stack_offset & 15) SUB rsp, pad @@ -120,7 +120,7 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3 mova [rsp+16], m3 mova [rsp+24], m_dith %endif ; mmsize == 8/16 -%endif ; %2 == 8 +%endif ; %1 == 8 xor r5, r5 @@ -130,11 +130,11 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3 ; 8 pixels but we can only handle 2 pixels per register, and thus 4 ; pixels per iteration. In order to not have to keep track of where ; we are w.r.t. dithering, we unroll the mmx/8bit loop x2. -%if %2 == 8 +%if %1 == 8 %rep 16/mmsize -%endif ; %2 == 8 +%endif ; %1 == 8 -%if %2 == 8 +%if %1 == 8 %ifdef ARCH_X86_32 mova m2, [rsp+mmsize*(0+%%i)] mova m1, [rsp+mmsize*(1+%%i)] @@ -142,31 +142,31 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3 mova m2, m8 mova m1, m_dith %endif ; x86-32/64 -%else ; %2 == 9/10/16 - mova m1, [yuv2yuvX_%2_start] +%else ; %1 == 9/10/16 + mova m1, [yuv2yuvX_%1_start] mova m2, m1 -%endif ; %2 == 8/9/10/16 +%endif ; %1 == 8/9/10/16 movsx cntr_reg, r1m .filterloop_ %+ %%i: ; input pixels mov r6, [r2+gprsize*cntr_reg-2*gprsize] -%if %2 == 16 +%if %1 == 16 mova m3, [r6+r5*4] mova m5, [r6+r5*4+mmsize] -%else ; %2 == 8/9/10 +%else ; %1 == 8/9/10 mova m3, [r6+r5*2] -%endif ; %2 == 8/9/10/16 +%endif ; %1 == 8/9/10/16 mov r6, [r2+gprsize*cntr_reg-gprsize] -%if %2 == 16 +%if %1 == 16 mova m4, [r6+r5*4] mova m6, [r6+r5*4+mmsize] -%else ; %2 == 8/9/10 +%else ; %1 == 8/9/10 mova m4, [r6+r5*2] -%endif ; %2 == 8/9/10/16 +%endif ; %1 == 8/9/10/16 ; coefficients movd m0, [r0+2*cntr_reg-4]; coeff[0], coeff[1] -%if %2 == 16 +%if %1 == 16 pshuflw m7, m0, 0 ; coeff[0] pshuflw m0, m0, 0x55 ; coeff[1] pmovsxwd m7, m7 ; word -> dword @@ -181,7 +181,7 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3 paddd m1, m5 paddd m2, m4 paddd m1, m6 -%else ; %2 == 10/9/8 +%else ; %1 == 10/9/8 punpcklwd m5, m3, m4 punpckhwd m3, m4 SPLATD m0, m0 @@ -191,85 +191,84 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3 paddd m2, m5 paddd m1, m3 -%endif ; %2 == 8/9/10/16 +%endif ; %1 == 8/9/10/16 sub cntr_reg, 2 jg .filterloop_ %+ %%i -%if %2 == 16 - psrad m2, 31 - %2 - psrad m1, 31 - %2 -%else ; %2 == 10/9/8 - psrad m2, 27 - %2 - psrad m1, 27 - %2 -%endif ; %2 == 8/9/10/16 +%if %1 == 16 + psrad m2, 31 - %1 + psrad m1, 31 - %1 +%else ; %1 == 10/9/8 + psrad m2, 27 - %1 + psrad m1, 27 - %1 +%endif ; %1 == 8/9/10/16 -%if %2 == 8 +%if %1 == 8 packssdw m2, m1 packuswb m2, m2 movh [r3+r5*1], m2 -%else ; %2 == 9/10/16 -%if %2 == 16 +%else ; %1 == 9/10/16 +%if %1 == 16 packssdw m2, m1 paddw m2, [minshort] -%else ; %2 == 9/10 -%ifidn %1, sse4 - packusdw m2, m1 -%elifidn %1, avx +%else ; %1 == 9/10 +%if cpuflag(sse4) packusdw m2, m1 %else ; mmx2/sse2 packssdw m2, m1 pmaxsw m2, m6 %endif ; mmx2/sse2/sse4/avx - pminsw m2, [yuv2yuvX_%2_upper] -%endif ; %2 == 9/10/16 + pminsw m2, [yuv2yuvX_%1_upper] +%endif ; %1 == 9/10/16 mova [r3+r5*2], m2 -%endif ; %2 == 8/9/10/16 +%endif ; %1 == 8/9/10/16 add r5, mmsize/2 sub r4d, mmsize/2 -%if %2 == 8 +%if %1 == 8 %assign %%i %%i+2 %endrep -%endif ; %2 == 8 +%endif ; %1 == 8 jg .pixelloop -%if %2 == 8 +%if %1 == 8 %ifdef ARCH_X86_32 ADD rsp, pad RET %else ; x86-64 REP_RET %endif ; x86-32/64 -%else ; %2 == 9/10/16 +%else ; %1 == 9/10/16 REP_RET -%endif ; %2 == 8/9/10/16 +%endif ; %1 == 8/9/10/16 %endmacro %define PALIGNR PALIGNR_MMX %ifdef ARCH_X86_32 -INIT_MMX -yuv2planeX_fn mmx2, 8, 0, 7 -yuv2planeX_fn mmx2, 9, 0, 5 -yuv2planeX_fn mmx2, 10, 0, 5 +INIT_MMX mmx2 +yuv2planeX_fn 8, 0, 7 +yuv2planeX_fn 9, 0, 5 +yuv2planeX_fn 10, 0, 5 %endif -INIT_XMM -yuv2planeX_fn sse2, 8, 10, 7 -yuv2planeX_fn sse2, 9, 7, 5 -yuv2planeX_fn sse2, 10, 7, 5 +INIT_XMM sse2 +yuv2planeX_fn 8, 10, 7 +yuv2planeX_fn 9, 7, 5 +yuv2planeX_fn 10, 7, 5 %define PALIGNR PALIGNR_SSSE3 -yuv2planeX_fn sse4, 8, 10, 7 -yuv2planeX_fn sse4, 9, 7, 5 -yuv2planeX_fn sse4, 10, 7, 5 -yuv2planeX_fn sse4, 16, 8, 5 +INIT_XMM sse4 +yuv2planeX_fn 8, 10, 7 +yuv2planeX_fn 9, 7, 5 +yuv2planeX_fn 10, 7, 5 +yuv2planeX_fn 16, 8, 5 %ifdef HAVE_AVX -INIT_AVX -yuv2planeX_fn avx, 8, 10, 7 -yuv2planeX_fn avx, 9, 7, 5 -yuv2planeX_fn avx, 10, 7, 5 +INIT_XMM avx +yuv2planeX_fn 8, 10, 7 +yuv2planeX_fn 9, 7, 5 +yuv2planeX_fn 10, 7, 5 %endif ; %1=outout-bpc, %2=alignment (u/a) diff --git a/tests/fate/image.mak b/tests/fate/image.mak index eb3ff957d4..4c0d361f28 100644 --- a/tests/fate/image.mak +++ b/tests/fate/image.mak @@ -34,10 +34,12 @@ FATE_TESTS += $(FATE_IMAGE) fate-image: $(FATE_IMAGE) FATE_TARGA = CBW8 \ + CCM8 \ CTC16 \ CTC24 \ CTC32 \ UBW8 \ + UCM8 \ UTC16 \ UTC24 \ UTC32 @@ -49,12 +51,12 @@ FATE_TESTS += $(FATE_TARGA) fate-targa: $(FATE_TARGA) fate-targa-conformance-CBW8: CMD = framecrc -i $(SAMPLES)/targa-conformance/CBW8.TGA -# fate-targa-conformance-CCM8: CMD = framecrc -i $(SAMPLES)/targa-conformance/CCM8.TGA +fate-targa-conformance-CCM8: CMD = framecrc -i $(SAMPLES)/targa-conformance/CCM8.TGA -pix_fmt rgba fate-targa-conformance-CTC16: CMD = framecrc -i $(SAMPLES)/targa-conformance/CTC16.TGA -pix_fmt rgb555le fate-targa-conformance-CTC24: CMD = framecrc -i $(SAMPLES)/targa-conformance/CTC24.TGA fate-targa-conformance-CTC32: CMD = framecrc -i $(SAMPLES)/targa-conformance/CTC32.TGA -pix_fmt bgra fate-targa-conformance-UBW8: CMD = framecrc -i $(SAMPLES)/targa-conformance/UBW8.TGA -# fate-targa-conformance-UCM8: CMD = framecrc -i $(SAMPLES)/targa-conformance/UCM8.TGA +fate-targa-conformance-UCM8: CMD = framecrc -i $(SAMPLES)/targa-conformance/UCM8.TGA -pix_fmt rgba fate-targa-conformance-UTC16: CMD = framecrc -i $(SAMPLES)/targa-conformance/UTC16.TGA -pix_fmt rgb555le fate-targa-conformance-UTC24: CMD = framecrc -i $(SAMPLES)/targa-conformance/UTC24.TGA fate-targa-conformance-UTC32: CMD = framecrc -i $(SAMPLES)/targa-conformance/UTC32.TGA -pix_fmt bgra diff --git a/tests/ref/fate/targa-conformance-CCM8 b/tests/ref/fate/targa-conformance-CCM8 new file mode 100644 index 0000000000..92f105e0d3 --- /dev/null +++ b/tests/ref/fate/targa-conformance-CCM8 @@ -0,0 +1 @@ +0, 0, 65536, 0x47e97fe9 diff --git a/tests/ref/fate/targa-conformance-UCM8 b/tests/ref/fate/targa-conformance-UCM8 new file mode 100644 index 0000000000..92f105e0d3 --- /dev/null +++ b/tests/ref/fate/targa-conformance-UCM8 @@ -0,0 +1 @@ +0, 0, 65536, 0x47e97fe9 |