diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-09-04 17:04:51 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-09-04 17:04:51 +0200 |
commit | 9dcc4c30f9d8ef70d3c07b4a77fdc507e8766107 (patch) | |
tree | ae6bb70dcf614bfbf032b5fe4ac4f712777cd0c3 | |
parent | 9de7622927b1b0ec6f8045b17b3116f046f44b87 (diff) | |
parent | b36f87ff90d87687f574d51385f47bb98d14600a (diff) | |
download | ffmpeg-9dcc4c30f9d8ef70d3c07b4a77fdc507e8766107.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
configure: add support for bdver1 and bdver2 CPU types.
avio: make avio_close NULL the freed buffer
pixdesc: cosmetics
proresenc: Don't free a buffer not owned by the codec
proresenc: Write the full value in one put_bits call
adpcmenc: Calculate the IMA_QT predictor without overflow
x86: Add convenience macros to check for CPU extensions and flags
x86: h264dsp: drop some unnecessary ifdefs around prototype declarations
mss12: merge decode_pixel() and decode_top_left_pixel()
mss12: reduce SliceContext size from 1067 to 164 KB
mss12: move SliceContexts out of the common context into the codec contexts
Conflicts:
libavformat/aviobuf.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rwxr-xr-x | configure | 4 | ||||
-rw-r--r-- | libavcodec/mss1.c | 11 | ||||
-rw-r--r-- | libavcodec/mss12.c | 177 | ||||
-rw-r--r-- | libavcodec/mss12.h | 18 | ||||
-rw-r--r-- | libavcodec/mss2.c | 16 | ||||
-rw-r--r-- | libavcodec/x86/h264dsp_init.c | 6 | ||||
-rw-r--r-- | libavformat/aviobuf.c | 2 | ||||
-rw-r--r-- | libavutil/pixdesc.c | 18 | ||||
-rw-r--r-- | libavutil/x86/cpu.h | 57 |
9 files changed, 169 insertions, 140 deletions
@@ -2404,7 +2404,7 @@ suncc_flags(){ prescott|nocona) echo -xarch=sse3 -xchip=pentium4 ;; *-sse3) echo -xarch=sse3 ;; core2) echo -xarch=ssse3 -xchip=core2 ;; - amdfam10|barcelona) echo -xarch=sse4_1 ;; + amdfam10|barcelona|bdver*) echo -xarch=sse4_1 ;; athlon-4|athlon-[mx]p) echo -xarch=ssea ;; k8|opteron|athlon64|athlon-fx) echo -xarch=sse2a ;; @@ -2773,7 +2773,7 @@ elif enabled x86; then disable cmov ;; # targets that do support conditional mov (cmov) - i686|pentiumpro|pentium[23]|pentium-m|athlon|athlon-tbird|athlon-4|athlon-[mx]p|athlon64*|k8*|opteron*|athlon-fx|core2|amdfam10|barcelona|atom) + i686|pentiumpro|pentium[23]|pentium-m|athlon|athlon-tbird|athlon-4|athlon-[mx]p|athlon64*|k8*|opteron*|athlon-fx|core2|amdfam10|barcelona|atom|bdver*) cpuflags="-march=$cpu" enable cmov enable fast_cmov diff --git a/libavcodec/mss1.c b/libavcodec/mss1.c index d622e2ca61..c90485b512 100644 --- a/libavcodec/mss1.c +++ b/libavcodec/mss1.c @@ -30,7 +30,7 @@ typedef struct MSS1Context { MSS12Context ctx; AVFrame pic; - SliceContext sc[2]; + SliceContext sc; } MSS1Context; static void arith_normalise(ArithCoder *c) @@ -89,7 +89,7 @@ static int arith_get_number(ArithCoder *c, int mod_val) return val; } -static int arith_get_prob(ArithCoder *c, int *probs) +static int arith_get_prob(ArithCoder *c, int16_t *probs) { int range = c->high - c->low + 1; int val = ((c->value - c->low + 1) * probs[0] - 1) / range; @@ -162,7 +162,8 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size, c->pal_stride = -ctx->pic.linesize[0]; c->keyframe = !arith_get_bit(&acoder); if (c->keyframe) { - ff_mss12_codec_reset(c); + c->corrupted = 0; + ff_mss12_slicecontext_reset(&ctx->sc); pal_changed = decode_pal(c, &acoder); ctx->pic.key_frame = 1; ctx->pic.pict_type = AV_PICTURE_TYPE_I; @@ -172,7 +173,7 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ctx->pic.key_frame = 0; ctx->pic.pict_type = AV_PICTURE_TYPE_P; } - c->corrupted = ff_mss12_decode_rect(&c->sc[0], &acoder, 0, 0, + c->corrupted = ff_mss12_decode_rect(&ctx->sc, &acoder, 0, 0, avctx->width, avctx->height); if (c->corrupted) return AVERROR_INVALIDDATA; @@ -194,7 +195,7 @@ static av_cold int mss1_decode_init(AVCodecContext *avctx) c->ctx.avctx = avctx; avctx->coded_frame = &c->pic; - ret = ff_mss12_decode_init(&c->ctx, 0); + ret = ff_mss12_decode_init(&c->ctx, 0, &c->sc, NULL); avctx->pix_fmt = PIX_FMT_PAL8; diff --git a/libavcodec/mss12.c b/libavcodec/mss12.c index 9283470884..b8df225822 100644 --- a/libavcodec/mss12.c +++ b/libavcodec/mss12.c @@ -61,13 +61,9 @@ static void model_reset(Model *m) m->weights[i] = 1; m->cum_prob[i] = m->num_syms - i; } - m->weights[0] = -1; - m->idx2sym[0] = -1; - m->sym2idx[m->num_syms] = -1; - for (i = 0; i < m->num_syms; i++) { - m->sym2idx[i] = i + 1; + m->weights[0] = 0; + for (i = 0; i < m->num_syms; i++) m->idx2sym[i + 1] = i; - } } static av_cold void model_init(Model *m, int num_syms, int thr_weight) @@ -75,7 +71,6 @@ static av_cold void model_init(Model *m, int num_syms, int thr_weight) m->num_syms = num_syms; m->thr_weight = thr_weight; m->threshold = num_syms * thr_weight; - model_reset(m); } static void model_rescale_weights(Model *m) @@ -109,8 +104,6 @@ void ff_mss12_model_update(Model *m, int val) m->idx2sym[val] = sym2; m->idx2sym[i] = sym1; - m->sym2idx[sym1] = i; - m->sym2idx[sym2] = val; val = i; } @@ -123,7 +116,7 @@ void ff_mss12_model_update(Model *m, int val) static void pixctx_reset(PixContext *ctx) { - int i, j, k; + int i, j; if (!ctx->special_initial_cache) for (i = 0; i < ctx->cache_size; i++) @@ -137,16 +130,15 @@ static void pixctx_reset(PixContext *ctx) model_reset(&ctx->cache_model); model_reset(&ctx->full_model); - for (i = 0; i < 4; i++) - for (j = 0; j < sec_order_sizes[i]; j++) - for (k = 0; k < 4; k++) - model_reset(&ctx->sec_models[i][j][k]); + for (i = 0; i < 15; i++) + for (j = 0; j < 4; j++) + model_reset(&ctx->sec_models[i][j]); } static av_cold void pixctx_init(PixContext *ctx, int cache_size, int full_model_syms, int special_initial_cache) { - int i, j, k; + int i, j, k, idx; ctx->cache_size = cache_size + 4; ctx->num_syms = cache_size; @@ -155,57 +147,36 @@ static av_cold void pixctx_init(PixContext *ctx, int cache_size, model_init(&ctx->cache_model, ctx->num_syms + 1, THRESH_LOW); model_init(&ctx->full_model, full_model_syms, THRESH_HIGH); - for (i = 0; i < 4; i++) - for (j = 0; j < sec_order_sizes[i]; j++) + for (i = 0, idx = 0; i < 4; i++) + for (j = 0; j < sec_order_sizes[i]; j++, idx++) for (k = 0; k < 4; k++) - model_init(&ctx->sec_models[i][j][k], 2 + i, + model_init(&ctx->sec_models[idx][k], 2 + i, i ? THRESH_LOW : THRESH_ADAPTIVE); } -static int decode_top_left_pixel(ArithCoder *acoder, PixContext *pctx) -{ - int i, val, pix; - - val = acoder->get_model_sym(acoder, &pctx->cache_model); - if (val < pctx->num_syms) { - pix = pctx->cache[val]; - } else { - pix = acoder->get_model_sym(acoder, &pctx->full_model); - for (i = 0; i < pctx->cache_size - 1; i++) - if (pctx->cache[i] == pix) - break; - val = i; - } - if (val) { - for (i = val; i > 0; i--) - pctx->cache[i] = pctx->cache[i - 1]; - pctx->cache[0] = pix; - } - - return pix; -} - -static int decode_pixel(ArithCoder *acoder, PixContext *pctx, - uint8_t *ngb, int num_ngb) +static av_always_inline int decode_pixel(ArithCoder *acoder, PixContext *pctx, + uint8_t *ngb, int num_ngb, int any_ngb) { int i, val, pix; val = acoder->get_model_sym(acoder, &pctx->cache_model); if (val < pctx->num_syms) { - int idx, j; - - idx = 0; - for (i = 0; i < pctx->cache_size; i++) { - for (j = 0; j < num_ngb; j++) - if (pctx->cache[i] == ngb[j]) - break; - if (j == num_ngb) { - if (idx == val) - break; - idx++; + if (any_ngb) { + int idx, j; + + idx = 0; + for (i = 0; i < pctx->cache_size; i++) { + for (j = 0; j < num_ngb; j++) + if (pctx->cache[i] == ngb[j]) + break; + if (j == num_ngb) { + if (idx == val) + break; + idx++; + } } + val = FFMIN(i, pctx->cache_size - 1); } - val = FFMIN(i, pctx->cache_size - 1); pix = pctx->cache[val]; } else { pix = acoder->get_model_sym(acoder, &pctx->full_model); @@ -268,50 +239,52 @@ static int decode_pixel_in_context(ArithCoder *acoder, PixContext *pctx, switch (nlen) { case 1: - case 4: layer = 0; break; case 2: if (neighbours[TOP] == neighbours[TOP_LEFT]) { if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT]) - layer = 3; + layer = 1; else if (neighbours[LEFT] == neighbours[TOP_LEFT]) layer = 2; else - layer = 4; + layer = 3; } else if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT]) { if (neighbours[LEFT] == neighbours[TOP_LEFT]) - layer = 1; + layer = 4; else layer = 5; } else if (neighbours[LEFT] == neighbours[TOP_LEFT]) { layer = 6; } else { - layer = 0; + layer = 7; } break; case 3: if (neighbours[TOP] == neighbours[TOP_LEFT]) - layer = 0; + layer = 8; else if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT]) - layer = 1; + layer = 9; else if (neighbours[LEFT] == neighbours[TOP_LEFT]) - layer = 2; + layer = 10; else if (neighbours[TOP_RIGHT] == neighbours[TOP]) - layer = 3; + layer = 11; else if (neighbours[TOP] == neighbours[LEFT]) - layer = 4; + layer = 12; else - layer = 5; + layer = 13; + break; + case 4: + layer = 14; break; } pix = acoder->get_model_sym(acoder, - &pctx->sec_models[nlen - 1][layer][sub]); + &pctx->sec_models[layer][sub]); if (pix < nlen) return ref_pix[pix]; else - return decode_pixel(acoder, pctx, ref_pix, nlen); + return decode_pixel(acoder, pctx, ref_pix, nlen, 1); } static int decode_region(ArithCoder *acoder, uint8_t *dst, uint8_t *rgb_pic, @@ -326,7 +299,7 @@ static int decode_region(ArithCoder *acoder, uint8_t *dst, uint8_t *rgb_pic, for (j = 0; j < height; j++) { for (i = 0; i < width; i++) { if (!i && !j) - p = decode_top_left_pixel(acoder, pctx); + p = decode_pixel(acoder, pctx, NULL, 0, 0); else p = decode_pixel_in_context(acoder, pctx, dst + i, stride, i, j, width - i - 1); @@ -418,7 +391,7 @@ static int decode_region_masked(MSS12Context const *c, ArithCoder *acoder, return -1; } else if (mask[i] != 0x80) { if (!i && !j) - p = decode_top_left_pixel(acoder, pctx); + p = decode_pixel(acoder, pctx, NULL, 0, 0); else p = decode_pixel_in_context(acoder, pctx, dst + i, stride, i, j, width - i - 1); @@ -435,39 +408,30 @@ static int decode_region_masked(MSS12Context const *c, ArithCoder *acoder, return 0; } -static av_cold void codec_init(MSS12Context *c, int version) +static av_cold void slicecontext_init(SliceContext *sc, + int version, int full_model_syms) { - int i; - for (i = 0; i < (c->slice_split ? 2 : 1); i++) { - c->sc[i].c = c; - model_init(&c->sc[i].intra_region, 2, THRESH_ADAPTIVE); - model_init(&c->sc[i].inter_region, 2, THRESH_ADAPTIVE); - model_init(&c->sc[i].split_mode, 3, THRESH_HIGH); - model_init(&c->sc[i].edge_mode, 2, THRESH_HIGH); - model_init(&c->sc[i].pivot, 3, THRESH_LOW); - - pixctx_init(&c->sc[i].intra_pix_ctx, 8, c->full_model_syms, 0); - - pixctx_init(&c->sc[i].inter_pix_ctx, version ? 3 : 2, - c->full_model_syms, version ? 1 : 0); - } - c->corrupted = 1; + model_init(&sc->intra_region, 2, THRESH_ADAPTIVE); + model_init(&sc->inter_region, 2, THRESH_ADAPTIVE); + model_init(&sc->split_mode, 3, THRESH_HIGH); + model_init(&sc->edge_mode, 2, THRESH_HIGH); + model_init(&sc->pivot, 3, THRESH_LOW); + + pixctx_init(&sc->intra_pix_ctx, 8, full_model_syms, 0); + + pixctx_init(&sc->inter_pix_ctx, version ? 3 : 2, + full_model_syms, version ? 1 : 0); } -void ff_mss12_codec_reset(MSS12Context *c) +void ff_mss12_slicecontext_reset(SliceContext *sc) { - int i; - for (i = 0; i < (c->slice_split ? 2 : 1); i++) { - model_reset(&c->sc[i].intra_region); - model_reset(&c->sc[i].inter_region); - model_reset(&c->sc[i].split_mode); - model_reset(&c->sc[i].edge_mode); - model_reset(&c->sc[i].pivot); - pixctx_reset(&c->sc[i].intra_pix_ctx); - pixctx_reset(&c->sc[i].inter_pix_ctx); - } - - c->corrupted = 0; + model_reset(&sc->intra_region); + model_reset(&sc->inter_region); + model_reset(&sc->split_mode); + model_reset(&sc->edge_mode); + model_reset(&sc->pivot); + pixctx_reset(&sc->intra_pix_ctx); + pixctx_reset(&sc->inter_pix_ctx); } static int decode_pivot(SliceContext *sc, ArithCoder *acoder, int base) @@ -505,7 +469,7 @@ static int decode_region_intra(SliceContext *sc, ArithCoder *acoder, uint8_t *dst = c->pal_pic + x + y * stride; uint8_t *rgb_dst = c->rgb_pic + x * 3 + y * rgb_stride; - pix = decode_top_left_pixel(acoder, &sc->intra_pix_ctx); + pix = decode_pixel(acoder, &sc->intra_pix_ctx, NULL, 0, 0); rgb_pix = c->pal[pix]; for (i = 0; i < height; i++, dst += stride, rgb_dst += rgb_stride) { memset(dst, pix, width); @@ -531,7 +495,7 @@ static int decode_region_inter(SliceContext *sc, ArithCoder *acoder, mode = acoder->get_model_sym(acoder, &sc->inter_region); if (!mode) { - mode = decode_top_left_pixel(acoder, &sc->inter_pix_ctx); + mode = decode_pixel(acoder, &sc->inter_pix_ctx, NULL, 0, 0); if (c->avctx->err_recognition & AV_EF_EXPLODE && ( c->rgb_pic && mode != 0x01 && mode != 0x02 && mode != 0x04 || @@ -595,7 +559,8 @@ int ff_mss12_decode_rect(SliceContext *sc, ArithCoder *acoder, return 0; } -av_cold int ff_mss12_decode_init(MSS12Context *c, int version) +av_cold int ff_mss12_decode_init(MSS12Context *c, int version, + SliceContext* sc1, SliceContext *sc2) { AVCodecContext *avctx = c->avctx; int i; @@ -690,7 +655,13 @@ av_cold int ff_mss12_decode_init(MSS12Context *c, int version) return AVERROR(ENOMEM); } - codec_init(c, version); + sc1->c = c; + slicecontext_init(sc1, version, c->full_model_syms); + if (c->slice_split) { + sc2->c = c; + slicecontext_init(sc2, version, c->full_model_syms); + } + c->corrupted = 1; return 0; } diff --git a/libavcodec/mss12.h b/libavcodec/mss12.h index 9068651e06..ba80030ee3 100644 --- a/libavcodec/mss12.h +++ b/libavcodec/mss12.h @@ -38,10 +38,9 @@ #define THRESH_HIGH 50 typedef struct Model { - int cum_prob[MODEL_MAX_SYMS + 1]; - int weights[MODEL_MAX_SYMS + 1]; - int idx2sym[MODEL_MAX_SYMS + 1]; - int sym2idx[MODEL_MAX_SYMS + 1]; + int16_t cum_prob[MODEL_MAX_SYMS + 1]; + int16_t weights[MODEL_MAX_SYMS + 1]; + uint8_t idx2sym[MODEL_MAX_SYMS + 1]; int num_syms; int thr_weight, threshold; } Model; @@ -60,7 +59,7 @@ typedef struct PixContext { int cache_size, num_syms; uint8_t cache[12]; Model cache_model, full_model; - Model sec_models[4][8][4]; + Model sec_models[15][4]; int special_initial_cache; } PixContext; @@ -86,21 +85,18 @@ typedef struct MSS12Context { int rgb_stride; int free_colours; int keyframe; - Model intra_region, inter_region; - Model pivot, edge_mode, split_mode; - PixContext intra_pix_ctx, inter_pix_ctx; int mvX, mvY; int corrupted; int slice_split; int full_model_syms; - SliceContext sc[2]; } MSS12Context; int ff_mss12_decode_rect(SliceContext *ctx, ArithCoder *acoder, int x, int y, int width, int height); void ff_mss12_model_update(Model *m, int val); -void ff_mss12_codec_reset(MSS12Context *ctx); -av_cold int ff_mss12_decode_init(MSS12Context *ctx, int version); +void ff_mss12_slicecontext_reset(SliceContext *sc); +av_cold int ff_mss12_decode_init(MSS12Context *c, int version, + SliceContext* sc1, SliceContext *sc2); av_cold int ff_mss12_decode_end(MSS12Context *ctx); #define ARITH_GET_BIT(VERSION) \ diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c index 886da05540..fc9ebff4d7 100644 --- a/libavcodec/mss2.c +++ b/libavcodec/mss2.c @@ -106,7 +106,7 @@ static int arith2_get_number(ArithCoder *c, int n) return val; } -static int arith2_get_prob(ArithCoder *c, int *probs) +static int arith2_get_prob(ArithCoder *c, int16_t *probs) { int range = c->high - c->low + 1, n = *probs; int scale = av_log2(range) - av_log2(n); @@ -671,14 +671,18 @@ static int mss2_decode_frame(AVCodecContext *avctx, void *data, int *data_size, buf += get_bits_count(&gb) >> 3; buf_size -= get_bits_count(&gb) >> 3; } else { - if (keyframe) - ff_mss12_codec_reset(c); + if (keyframe) { + c->corrupted = 0; + ff_mss12_slicecontext_reset(&ctx->sc[0]); + if (c->slice_split) + ff_mss12_slicecontext_reset(&ctx->sc[1]); + } else if (c->corrupted) return AVERROR_INVALIDDATA; bytestream2_init(&gB, buf, buf_size + ARITH2_PADDING); arith2_init(&acoder, &gB); c->keyframe = keyframe; - if (c->corrupted = ff_mss12_decode_rect(&c->sc[0], &acoder, 0, 0, + if (c->corrupted = ff_mss12_decode_rect(&ctx->sc[0], &acoder, 0, 0, avctx->width, ctx->split_position)) return AVERROR_INVALIDDATA; @@ -690,7 +694,7 @@ static int mss2_decode_frame(AVCodecContext *avctx, void *data, int *data_size, return AVERROR_INVALIDDATA; bytestream2_init(&gB, buf, buf_size + ARITH2_PADDING); arith2_init(&acoder, &gB); - if (c->corrupted = ff_mss12_decode_rect(&c->sc[1], &acoder, 0, + if (c->corrupted = ff_mss12_decode_rect(&ctx->sc[1], &acoder, 0, ctx->split_position, avctx->width, avctx->height - ctx->split_position)) @@ -830,7 +834,7 @@ static av_cold int mss2_decode_init(AVCodecContext *avctx) int ret; c->avctx = avctx; avctx->coded_frame = &ctx->pic; - if (ret = ff_mss12_decode_init(c, 1)) + if (ret = ff_mss12_decode_init(c, 1, &ctx->sc[0], &ctx->sc[1])) return ret; c->pal_stride = c->mask_stride; c->pal_pic = av_malloc(c->pal_stride * avctx->height); diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c index 472f252ba9..6c83d8ce58 100644 --- a/libavcodec/x86/h264dsp_init.c +++ b/libavcodec/x86/h264dsp_init.c @@ -39,11 +39,9 @@ IDCT_ADD_FUNC(8_dc, 10, sse2) IDCT_ADD_FUNC(8, 8, mmx) IDCT_ADD_FUNC(8, 8, sse2) IDCT_ADD_FUNC(8, 10, sse2) -#if HAVE_AVX_EXTERNAL IDCT_ADD_FUNC(, 10, avx) IDCT_ADD_FUNC(8_dc, 10, avx) IDCT_ADD_FUNC(8, 10, avx) -#endif #define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT) \ @@ -64,10 +62,8 @@ IDCT_ADD_REP_FUNC(, 16intra, 8, mmx) IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2) IDCT_ADD_REP_FUNC(, 16intra, 8, sse2) IDCT_ADD_REP_FUNC(, 16intra, 10, sse2) -#if HAVE_AVX_EXTERNAL IDCT_ADD_REP_FUNC(, 16, 10, avx) IDCT_ADD_REP_FUNC(, 16intra, 10, avx) -#endif #define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT) \ @@ -79,9 +75,7 @@ IDCT_ADD_REP_FUNC2(, 8, 8, mmx) IDCT_ADD_REP_FUNC2(, 8, 8, mmx2) IDCT_ADD_REP_FUNC2(, 8, 8, sse2) IDCT_ADD_REP_FUNC2(, 8, 10, sse2) -#if HAVE_AVX_EXTERNAL IDCT_ADD_REP_FUNC2(, 8, 10, avx) -#endif void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul); void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul); diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c index f1ac24e844..282a62e2b7 100644 --- a/libavformat/aviobuf.c +++ b/libavformat/aviobuf.c @@ -796,7 +796,7 @@ int avio_close(AVIOContext *s) avio_flush(s); h = s->opaque; - av_free(s->buffer); + av_freep(&s->buffer); if (!s->write_flag) av_log(s, AV_LOG_DEBUG, "Statistics: %"PRId64" bytes read, %d seeks\n", s->bytes_read, s->seek_count); av_free(s); diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index 9e5a89b5c2..d64d300ea7 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -26,8 +26,10 @@ #include "intreadwrite.h" -void av_read_image_line(uint16_t *dst, const uint8_t *data[4], const int linesize[4], - const AVPixFmtDescriptor *desc, int x, int y, int c, int w, +void av_read_image_line(uint16_t *dst, + const uint8_t *data[4], const int linesize[4], + const AVPixFmtDescriptor *desc, + int x, int y, int c, int w, int read_pal_component) { AVComponentDescriptor comp = desc->comp[c]; @@ -53,7 +55,8 @@ void av_read_image_line(uint16_t *dst, const uint8_t *data[4], const int linesiz *dst++ = val; } } else { - const uint8_t *p = data[plane] + y * linesize[plane] + x * step + comp.offset_plus1 - 1; + const uint8_t *p = data[plane] + y * linesize[plane] + + x * step + comp.offset_plus1 - 1; int is_8bit = shift + depth <= 8; if (is_8bit) @@ -71,8 +74,10 @@ void av_read_image_line(uint16_t *dst, const uint8_t *data[4], const int linesiz } } -void av_write_image_line(const uint16_t *src, uint8_t *data[4], const int linesize[4], - const AVPixFmtDescriptor *desc, int x, int y, int c, int w) +void av_write_image_line(const uint16_t *src, + uint8_t *data[4], const int linesize[4], + const AVPixFmtDescriptor *desc, + int x, int y, int c, int w) { AVComponentDescriptor comp = desc->comp[c]; int plane = comp.plane; @@ -93,7 +98,8 @@ void av_write_image_line(const uint16_t *src, uint8_t *data[4], const int linesi } } else { int shift = comp.shift; - uint8_t *p = data[plane] + y * linesize[plane] + x * step + comp.offset_plus1 - 1; + uint8_t *p = data[plane] + y * linesize[plane] + + x * step + comp.offset_plus1 - 1; if (shift + depth <= 8) { p += !!(flags & PIX_FMT_BE); diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h new file mode 100644 index 0000000000..76abeb90b1 --- /dev/null +++ b/libavutil/x86/cpu.h @@ -0,0 +1,57 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_X86_CPU_H +#define AVUTIL_X86_CPU_H + +#include "config.h" +#include "libavutil/cpu.h" + +#define CPUEXT(flags, suffix, cpuext) \ + (HAVE_ ## cpuext ## suffix && ((flags) & AV_CPU_FLAG_ ## cpuext)) + +#define AV_CPU_FLAG_AMD3DNOW AV_CPU_FLAG_3DNOW +#define AV_CPU_FLAG_AMD3DNOWEXT AV_CPU_FLAG_3DNOWEXT + +#define EXTERNAL_AMD3DNOW(flags) CPUEXT(flags, _EXTERNAL, AMD3DNOW) +#define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT(flags, _EXTERNAL, AMD3DNOWEXT) +#define EXTERNAL_MMX(flags) CPUEXT(flags, _EXTERNAL, MMX) +#define EXTERNAL_MMXEXT(flags) CPUEXT(flags, _EXTERNAL, MMXEXT) +#define EXTERNAL_SSE(flags) CPUEXT(flags, _EXTERNAL, SSE) +#define EXTERNAL_SSE2(flags) CPUEXT(flags, _EXTERNAL, SSE2) +#define EXTERNAL_SSE3(flags) CPUEXT(flags, _EXTERNAL, SSE3) +#define EXTERNAL_SSSE3(flags) CPUEXT(flags, _EXTERNAL, SSSE3) +#define EXTERNAL_SSE4(flags) CPUEXT(flags, _EXTERNAL, SSE4) +#define EXTERNAL_SSE42(flags) CPUEXT(flags, _EXTERNAL, SSE42) +#define EXTERNAL_AVX(flags) CPUEXT(flags, _EXTERNAL, AVX) +#define EXTERNAL_FMA4(flags) CPUEXT(flags, _EXTERNAL, FMA4) + +#define INLINE_AMD3DNOW(flags) CPUEXT(flags, _INLINE, AMD3DNOW) +#define INLINE_AMD3DNOWEXT(flags) CPUEXT(flags, _INLINE, AMD3DNOWEXT) +#define INLINE_MMX(flags) CPUEXT(flags, _INLINE, MMX) +#define INLINE_MMXEXT(flags) CPUEXT(flags, _INLINE, MMXEXT) +#define INLINE_SSE(flags) CPUEXT(flags, _INLINE, SSE) +#define INLINE_SSE2(flags) CPUEXT(flags, _INLINE, SSE2) +#define INLINE_SSE3(flags) CPUEXT(flags, _INLINE, SSE3) +#define INLINE_SSSE3(flags) CPUEXT(flags, _INLINE, SSSE3) +#define INLINE_SSE4(flags) CPUEXT(flags, _INLINE, SSE4) +#define INLINE_SSE42(flags) CPUEXT(flags, _INLINE, SSE42) +#define INLINE_AVX(flags) CPUEXT(flags, _INLINE, AVX) +#define INLINE_FMA4(flags) CPUEXT(flags, _INLINE, FMA4) + +#endif /* AVUTIL_X86_CPU_H */ |