aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-03-07 02:57:53 +0100
committerMichael Niedermayer <michaelni@gmx.at>2012-03-07 03:22:49 +0100
commit6df42f98746be06c883ce683563e07c9a2af983f (patch)
tree6bb893aaf179526515cfb3b1cc933721317dcf6f /libavcodec
parent57986c501e8c97d4bd2e1b7ce9e9037c4ae06245 (diff)
parentb5161908e06b4497bf663510fb495ba97a6fd2b5 (diff)
downloadffmpeg-6df42f98746be06c883ce683563e07c9a2af983f.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: SBR DSP: fix SSE code to not use SSE2 instructions. cpu: initialize mask to -1, so that by default, optimizations are used. error_resilience: initialize s->block_index[]. svq3: protect against negative quantizers. Don't use ff_cropTbl[] for IDCT. swscale: make filterPos 32bit. FATE: add CPUFLAGS variable, mapping to -cpuflags avconv option. avconv: add -cpuflags option for setting supported cpuflags. cpu: add av_set_cpu_flags_mask(). libx264: Allow overriding the sliced threads option avconv: fix counting encoded video size. Conflicts: doc/APIchanges doc/fate.texi doc/ffmpeg.texi ffmpeg.c libavcodec/h264idct_template.c libavcodec/svq3.c libavutil/avutil.h libavutil/cpu.c libavutil/cpu.h libswscale/swscale.c tests/Makefile tests/fate-run.sh tests/regression-funcs.sh Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/dsputil.c70
-rw-r--r--libavcodec/error_resilience.c20
-rw-r--r--libavcodec/h264idct_template.c32
-rw-r--r--libavcodec/libx264.c3
-rw-r--r--libavcodec/rv34dsp.c6
-rw-r--r--libavcodec/simple_idct.c10
-rw-r--r--libavcodec/simple_idct_template.c34
-rw-r--r--libavcodec/vc1dsp.c62
-rw-r--r--libavcodec/vp3dsp.c68
-rw-r--r--libavcodec/vp8dsp.c18
-rw-r--r--libavcodec/x86/sbrdsp.asm2
11 files changed, 157 insertions, 168 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 9821703d37..7379fa2e73 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -367,18 +367,17 @@ void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
int line_size)
{
int i;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
/* read the pixels */
for(i=0;i<8;i++) {
- pixels[0] = cm[block[0]];
- pixels[1] = cm[block[1]];
- pixels[2] = cm[block[2]];
- pixels[3] = cm[block[3]];
- pixels[4] = cm[block[4]];
- pixels[5] = cm[block[5]];
- pixels[6] = cm[block[6]];
- pixels[7] = cm[block[7]];
+ pixels[0] = av_clip_uint8(block[0]);
+ pixels[1] = av_clip_uint8(block[1]);
+ pixels[2] = av_clip_uint8(block[2]);
+ pixels[3] = av_clip_uint8(block[3]);
+ pixels[4] = av_clip_uint8(block[4]);
+ pixels[5] = av_clip_uint8(block[5]);
+ pixels[6] = av_clip_uint8(block[6]);
+ pixels[7] = av_clip_uint8(block[7]);
pixels += line_size;
block += 8;
@@ -389,14 +388,13 @@ static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
int line_size)
{
int i;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
/* read the pixels */
for(i=0;i<4;i++) {
- pixels[0] = cm[block[0]];
- pixels[1] = cm[block[1]];
- pixels[2] = cm[block[2]];
- pixels[3] = cm[block[3]];
+ pixels[0] = av_clip_uint8(block[0]);
+ pixels[1] = av_clip_uint8(block[1]);
+ pixels[2] = av_clip_uint8(block[2]);
+ pixels[3] = av_clip_uint8(block[3]);
pixels += line_size;
block += 8;
@@ -407,12 +405,11 @@ static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
int line_size)
{
int i;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
/* read the pixels */
for(i=0;i<2;i++) {
- pixels[0] = cm[block[0]];
- pixels[1] = cm[block[1]];
+ pixels[0] = av_clip_uint8(block[0]);
+ pixels[1] = av_clip_uint8(block[1]);
pixels += line_size;
block += 8;
@@ -444,18 +441,17 @@ void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
int line_size)
{
int i;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
/* read the pixels */
for(i=0;i<8;i++) {
- pixels[0] = cm[pixels[0] + block[0]];
- pixels[1] = cm[pixels[1] + block[1]];
- pixels[2] = cm[pixels[2] + block[2]];
- pixels[3] = cm[pixels[3] + block[3]];
- pixels[4] = cm[pixels[4] + block[4]];
- pixels[5] = cm[pixels[5] + block[5]];
- pixels[6] = cm[pixels[6] + block[6]];
- pixels[7] = cm[pixels[7] + block[7]];
+ pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+ pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+ pixels[2] = av_clip_uint8(pixels[2] + block[2]);
+ pixels[3] = av_clip_uint8(pixels[3] + block[3]);
+ pixels[4] = av_clip_uint8(pixels[4] + block[4]);
+ pixels[5] = av_clip_uint8(pixels[5] + block[5]);
+ pixels[6] = av_clip_uint8(pixels[6] + block[6]);
+ pixels[7] = av_clip_uint8(pixels[7] + block[7]);
pixels += line_size;
block += 8;
}
@@ -465,14 +461,13 @@ static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
int line_size)
{
int i;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
/* read the pixels */
for(i=0;i<4;i++) {
- pixels[0] = cm[pixels[0] + block[0]];
- pixels[1] = cm[pixels[1] + block[1]];
- pixels[2] = cm[pixels[2] + block[2]];
- pixels[3] = cm[pixels[3] + block[3]];
+ pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+ pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+ pixels[2] = av_clip_uint8(pixels[2] + block[2]);
+ pixels[3] = av_clip_uint8(pixels[3] + block[3]);
pixels += line_size;
block += 8;
}
@@ -482,12 +477,11 @@ static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
int line_size)
{
int i;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
/* read the pixels */
for(i=0;i<2;i++) {
- pixels[0] = cm[pixels[0] + block[0]];
- pixels[1] = cm[pixels[1] + block[1]];
+ pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+ pixels[1] = av_clip_uint8(pixels[1] + block[1]);
pixels += line_size;
block += 8;
}
@@ -2779,15 +2773,11 @@ static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
{
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
- dest[0] = cm[(block[0] + 4)>>3];
+ dest[0] = av_clip_uint8((block[0] + 4)>>3);
}
static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
{
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
- dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
+ dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
}
static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
index 1c1420f93c..35825581af 100644
--- a/libavcodec/error_resilience.c
+++ b/libavcodec/error_resilience.c
@@ -440,9 +440,14 @@ static void guess_mv(MpegEncContext *s)
if ((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) ||
num_avail <= mb_width / 2) {
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
+ s->mb_x = 0;
+ s->mb_y = mb_y;
+ ff_init_block_index(s);
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
const int mb_xy = mb_x + mb_y * s->mb_stride;
+ ff_update_block_index(s);
+
if (IS_INTRA(s->current_picture.f.mb_type[mb_xy]))
continue;
if (!(s->error_status_table[mb_xy] & ER_MV_ERROR))
@@ -477,6 +482,9 @@ static void guess_mv(MpegEncContext *s)
changed = 0;
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
+ s->mb_x = 0;
+ s->mb_y = mb_y;
+ ff_init_block_index(s);
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
const int mb_xy = mb_x + mb_y * s->mb_stride;
int mv_predictor[8][2] = { { 0 } };
@@ -488,6 +496,8 @@ static void guess_mv(MpegEncContext *s)
const int mot_index = (mb_x + mb_y * mot_stride) * mot_step;
int prev_x, prev_y, prev_ref;
+ ff_update_block_index(s);
+
if ((mb_x ^ mb_y ^ pass) & 1)
continue;
@@ -1098,11 +1108,16 @@ void ff_er_frame_end(MpegEncContext *s)
/* handle inter blocks with damaged AC */
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
+ s->mb_x = 0;
+ s->mb_y = mb_y;
+ ff_init_block_index(s);
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
const int mb_xy = mb_x + mb_y * s->mb_stride;
const int mb_type = s->current_picture.f.mb_type[mb_xy];
int dir = !s->last_picture.f.data[0];
+ ff_update_block_index(s);
+
error = s->error_status_table[mb_xy];
if (IS_INTRA(mb_type))
@@ -1140,11 +1155,16 @@ void ff_er_frame_end(MpegEncContext *s)
/* guess MVs */
if (s->pict_type == AV_PICTURE_TYPE_B) {
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
+ s->mb_x = 0;
+ s->mb_y = mb_y;
+ ff_init_block_index(s);
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
int xy = mb_x * 2 + mb_y * 2 * s->b8_stride;
const int mb_xy = mb_x + mb_y * s->mb_stride;
const int mb_type = s->current_picture.f.mb_type[mb_xy];
+ ff_update_block_index(s);
+
error = s->error_status_table[mb_xy];
if (IS_INTRA(mb_type))
diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
index c59976a1d9..5c730fdb26 100644
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@@ -49,7 +49,6 @@ static const uint8_t scan8[16*3]={
void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride)
{
int i;
- INIT_CLIP
pixel *dst = (pixel*)_dst;
dctcoef *block = (dctcoef*)_block;
stride >>= sizeof(pixel)-1;
@@ -74,16 +73,15 @@ void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride)
const int z2= (block[1 + 4*i]>>1) - block[3 + 4*i];
const int z3= block[1 + 4*i] + (block[3 + 4*i]>>1);
- dst[i + 0*stride]= CLIP(dst[i + 0*stride] + ((z0 + z3) >> 6));
- dst[i + 1*stride]= CLIP(dst[i + 1*stride] + ((z1 + z2) >> 6));
- dst[i + 2*stride]= CLIP(dst[i + 2*stride] + ((z1 - z2) >> 6));
- dst[i + 3*stride]= CLIP(dst[i + 3*stride] + ((z0 - z3) >> 6));
+ dst[i + 0*stride]= av_clip_pixel(dst[i + 0*stride] + ((z0 + z3) >> 6));
+ dst[i + 1*stride]= av_clip_pixel(dst[i + 1*stride] + ((z1 + z2) >> 6));
+ dst[i + 2*stride]= av_clip_pixel(dst[i + 2*stride] + ((z1 - z2) >> 6));
+ dst[i + 3*stride]= av_clip_pixel(dst[i + 3*stride] + ((z0 - z3) >> 6));
}
}
void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
int i;
- INIT_CLIP
pixel *dst = (pixel*)_dst;
dctcoef *block = (dctcoef*)_block;
stride >>= sizeof(pixel)-1;
@@ -143,14 +141,14 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
const int b5 = (a3>>2) - a5;
const int b7 = a7 - (a1>>2);
- dst[i + 0*stride] = CLIP( dst[i + 0*stride] + ((b0 + b7) >> 6) );
- dst[i + 1*stride] = CLIP( dst[i + 1*stride] + ((b2 + b5) >> 6) );
- dst[i + 2*stride] = CLIP( dst[i + 2*stride] + ((b4 + b3) >> 6) );
- dst[i + 3*stride] = CLIP( dst[i + 3*stride] + ((b6 + b1) >> 6) );
- dst[i + 4*stride] = CLIP( dst[i + 4*stride] + ((b6 - b1) >> 6) );
- dst[i + 5*stride] = CLIP( dst[i + 5*stride] + ((b4 - b3) >> 6) );
- dst[i + 6*stride] = CLIP( dst[i + 6*stride] + ((b2 - b5) >> 6) );
- dst[i + 7*stride] = CLIP( dst[i + 7*stride] + ((b0 - b7) >> 6) );
+ dst[i + 0*stride] = av_clip_pixel( dst[i + 0*stride] + ((b0 + b7) >> 6) );
+ dst[i + 1*stride] = av_clip_pixel( dst[i + 1*stride] + ((b2 + b5) >> 6) );
+ dst[i + 2*stride] = av_clip_pixel( dst[i + 2*stride] + ((b4 + b3) >> 6) );
+ dst[i + 3*stride] = av_clip_pixel( dst[i + 3*stride] + ((b6 + b1) >> 6) );
+ dst[i + 4*stride] = av_clip_pixel( dst[i + 4*stride] + ((b6 - b1) >> 6) );
+ dst[i + 5*stride] = av_clip_pixel( dst[i + 5*stride] + ((b4 - b3) >> 6) );
+ dst[i + 6*stride] = av_clip_pixel( dst[i + 6*stride] + ((b2 - b5) >> 6) );
+ dst[i + 7*stride] = av_clip_pixel( dst[i + 7*stride] + ((b0 - b7) >> 6) );
}
}
@@ -158,13 +156,12 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
void FUNCC(ff_h264_idct_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){
int i, j;
int dc = (((dctcoef*)block)[0] + 32) >> 6;
- INIT_CLIP
pixel *dst = (pixel*)p_dst;
stride >>= sizeof(pixel)-1;
for( j = 0; j < 4; j++ )
{
for( i = 0; i < 4; i++ )
- dst[i] = CLIP( dst[i] + dc );
+ dst[i] = av_clip_pixel( dst[i] + dc );
dst += stride;
}
}
@@ -172,13 +169,12 @@ void FUNCC(ff_h264_idct_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){
void FUNCC(ff_h264_idct8_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){
int i, j;
int dc = (((dctcoef*)block)[0] + 32) >> 6;
- INIT_CLIP
pixel *dst = (pixel*)p_dst;
stride >>= sizeof(pixel)-1;
for( j = 0; j < 8; j++ )
{
for( i = 0; i < 8; i++ )
- dst[i] = CLIP( dst[i] + dc );
+ dst[i] = av_clip_pixel( dst[i] + dc );
dst += stride;
}
}
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 5610038d16..3d503dbfc7 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -454,6 +454,8 @@ static av_cold int X264_init(AVCodecContext *avctx)
x4->params.analyse.b_psnr = avctx->flags & CODEC_FLAG_PSNR;
x4->params.i_threads = avctx->thread_count;
+ if (avctx->thread_type)
+ x4->params.b_sliced_threads = avctx->thread_type == FF_THREAD_SLICE;
x4->params.b_interlaced = avctx->flags & CODEC_FLAG_INTERLACED_DCT;
@@ -631,6 +633,7 @@ static const AVCodecDefault x264_defaults[] = {
{ "coder", "-1" },
{ "cmp", "-1" },
{ "threads", AV_STRINGIFY(X264_THREADS_AUTO) },
+ { "thread_type", "0" },
{ NULL },
};
diff --git a/libavcodec/rv34dsp.c b/libavcodec/rv34dsp.c
index 434799921b..4145c4dd85 100644
--- a/libavcodec/rv34dsp.c
+++ b/libavcodec/rv34dsp.c
@@ -102,15 +102,13 @@ static void rv34_inv_transform_noround_c(DCTELEM *block){
static void rv34_idct_dc_add_c(uint8_t *dst, ptrdiff_t stride, int dc)
{
- const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
int i, j;
- cm += (13*13*dc + 0x200) >> 10;
-
+ dc = (13*13*dc + 0x200) >> 10;
for (i = 0; i < 4; i++)
{
for (j = 0; j < 4; j++)
- dst[j] = cm[ dst[j] ];
+ dst[j] = av_clip_uint8( dst[j] + dc );
dst += stride;
}
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index 1bf2d56ba4..293185074b 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -132,7 +132,6 @@ void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col)
{
int c0, c1, c2, c3, a0, a1, a2, a3;
- const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
a0 = col[8*0];
a1 = col[8*1];
@@ -142,13 +141,13 @@ static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col
c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
c1 = a1 * C1 + a3 * C2;
c3 = a1 * C2 - a3 * C1;
- dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];
+ dest[0] = av_clip_uint8(dest[0] + ((c0 + c1) >> C_SHIFT));
dest += line_size;
- dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];
+ dest[0] = av_clip_uint8(dest[0] + ((c2 + c3) >> C_SHIFT));
dest += line_size;
- dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];
+ dest[0] = av_clip_uint8(dest[0] + ((c2 - c3) >> C_SHIFT));
dest += line_size;
- dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
+ dest[0] = av_clip_uint8(dest[0] + ((c0 - c1) >> C_SHIFT));
}
#define RN_SHIFT 15
@@ -160,7 +159,6 @@ static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col
static inline void idct4row(DCTELEM *row)
{
int c0, c1, c2, c3, a0, a1, a2, a3;
- //const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
a0 = row[0];
a1 = row[1];
diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c
index 4bb51363f9..b67893c2b2 100644
--- a/libavcodec/simple_idct_template.c
+++ b/libavcodec/simple_idct_template.c
@@ -224,50 +224,48 @@ static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
DCTELEM *col)
{
int a0, a1, a2, a3, b0, b1, b2, b3;
- INIT_CLIP;
IDCT_COLS;
- dest[0] = CLIP((a0 + b0) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT);
dest += line_size;
- dest[0] = CLIP((a1 + b1) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT);
dest += line_size;
- dest[0] = CLIP((a2 + b2) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT);
dest += line_size;
- dest[0] = CLIP((a3 + b3) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT);
dest += line_size;
- dest[0] = CLIP((a3 - b3) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT);
dest += line_size;
- dest[0] = CLIP((a2 - b2) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT);
dest += line_size;
- dest[0] = CLIP((a1 - b1) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT);
dest += line_size;
- dest[0] = CLIP((a0 - b0) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT);
}
static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
DCTELEM *col)
{
int a0, a1, a2, a3, b0, b1, b2, b3;
- INIT_CLIP;
IDCT_COLS;
- dest[0] = CLIP(dest[0] + ((a0 + b0) >> COL_SHIFT));
+ dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT));
dest += line_size;
- dest[0] = CLIP(dest[0] + ((a1 + b1) >> COL_SHIFT));
+ dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT));
dest += line_size;
- dest[0] = CLIP(dest[0] + ((a2 + b2) >> COL_SHIFT));
+ dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT));
dest += line_size;
- dest[0] = CLIP(dest[0] + ((a3 + b3) >> COL_SHIFT));
+ dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT));
dest += line_size;
- dest[0] = CLIP(dest[0] + ((a3 - b3) >> COL_SHIFT));
+ dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT));
dest += line_size;
- dest[0] = CLIP(dest[0] + ((a2 - b2) >> COL_SHIFT));
+ dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT));
dest += line_size;
- dest[0] = CLIP(dest[0] + ((a1 - b1) >> COL_SHIFT));
+ dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT));
dest += line_size;
- dest[0] = CLIP(dest[0] + ((a0 - b0) >> COL_SHIFT));
+ dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
}
static inline void FUNC(idctSparseCol)(DCTELEM *col)
diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
index 91578a74b8..da7593feca 100644
--- a/libavcodec/vc1dsp.c
+++ b/libavcodec/vc1dsp.c
@@ -139,8 +139,6 @@ static void vc1_h_s_overlap_c(DCTELEM *left, DCTELEM *right)
* @see 8.6
*/
static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
int a0 = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3;
int a0_sign = a0 >> 31; /* Store sign */
a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */
@@ -163,8 +161,8 @@ static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){
else{
d = FFMIN(d, clip);
d = (d ^ d_sign) - d_sign; /* Restore sign */
- src[-1*stride] = cm[src[-1*stride] - d];
- src[ 0*stride] = cm[src[ 0*stride] + d];
+ src[-1*stride] = av_clip_uint8(src[-1*stride] - d);
+ src[ 0*stride] = av_clip_uint8(src[ 0*stride] + d);
}
return 1;
}
@@ -234,19 +232,17 @@ static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
{
int i;
int dc = block[0];
- const uint8_t *cm;
dc = (3 * dc + 1) >> 1;
dc = (3 * dc + 16) >> 5;
- cm = ff_cropTbl + MAX_NEG_CROP + dc;
for(i = 0; i < 8; i++){
- dest[0] = cm[dest[0]];
- dest[1] = cm[dest[1]];
- dest[2] = cm[dest[2]];
- dest[3] = cm[dest[3]];
- dest[4] = cm[dest[4]];
- dest[5] = cm[dest[5]];
- dest[6] = cm[dest[6]];
- dest[7] = cm[dest[7]];
+ dest[0] = av_clip_uint8(dest[0] + dc);
+ dest[1] = av_clip_uint8(dest[1] + dc);
+ dest[2] = av_clip_uint8(dest[2] + dc);
+ dest[3] = av_clip_uint8(dest[3] + dc);
+ dest[4] = av_clip_uint8(dest[4] + dc);
+ dest[5] = av_clip_uint8(dest[5] + dc);
+ dest[6] = av_clip_uint8(dest[6] + dc);
+ dest[7] = av_clip_uint8(dest[7] + dc);
dest += linesize;
}
}
@@ -326,19 +322,17 @@ static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
{
int i;
int dc = block[0];
- const uint8_t *cm;
dc = ( 3 * dc + 1) >> 1;
dc = (17 * dc + 64) >> 7;
- cm = ff_cropTbl + MAX_NEG_CROP + dc;
for(i = 0; i < 4; i++){
- dest[0] = cm[dest[0]];
- dest[1] = cm[dest[1]];
- dest[2] = cm[dest[2]];
- dest[3] = cm[dest[3]];
- dest[4] = cm[dest[4]];
- dest[5] = cm[dest[5]];
- dest[6] = cm[dest[6]];
- dest[7] = cm[dest[7]];
+ dest[0] = av_clip_uint8(dest[0] + dc);
+ dest[1] = av_clip_uint8(dest[1] + dc);
+ dest[2] = av_clip_uint8(dest[2] + dc);
+ dest[3] = av_clip_uint8(dest[3] + dc);
+ dest[4] = av_clip_uint8(dest[4] + dc);
+ dest[5] = av_clip_uint8(dest[5] + dc);
+ dest[6] = av_clip_uint8(dest[6] + dc);
+ dest[7] = av_clip_uint8(dest[7] + dc);
dest += linesize;
}
}
@@ -403,15 +397,13 @@ static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
{
int i;
int dc = block[0];
- const uint8_t *cm;
dc = (17 * dc + 4) >> 3;
dc = (12 * dc + 64) >> 7;
- cm = ff_cropTbl + MAX_NEG_CROP + dc;
for(i = 0; i < 8; i++){
- dest[0] = cm[dest[0]];
- dest[1] = cm[dest[1]];
- dest[2] = cm[dest[2]];
- dest[3] = cm[dest[3]];
+ dest[0] = av_clip_uint8(dest[0] + dc);
+ dest[1] = av_clip_uint8(dest[1] + dc);
+ dest[2] = av_clip_uint8(dest[2] + dc);
+ dest[3] = av_clip_uint8(dest[3] + dc);
dest += linesize;
}
}
@@ -476,15 +468,13 @@ static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
{
int i;
int dc = block[0];
- const uint8_t *cm;
dc = (17 * dc + 4) >> 3;
dc = (17 * dc + 64) >> 7;
- cm = ff_cropTbl + MAX_NEG_CROP + dc;
for(i = 0; i < 4; i++){
- dest[0] = cm[dest[0]];
- dest[1] = cm[dest[1]];
- dest[2] = cm[dest[2]];
- dest[3] = cm[dest[3]];
+ dest[0] = av_clip_uint8(dest[0] + dc);
+ dest[1] = av_clip_uint8(dest[1] + dc);
+ dest[2] = av_clip_uint8(dest[2] + dc);
+ dest[3] = av_clip_uint8(dest[3] + dc);
dest += linesize;
}
}
diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c
index 94efa3b1d2..9fded0f53e 100644
--- a/libavcodec/vp3dsp.c
+++ b/libavcodec/vp3dsp.c
@@ -41,7 +41,6 @@
static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
{
int16_t *ip = input;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
int Ed, Gd, Add, Bdd, Fd, Hd;
@@ -147,29 +146,29 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
ip[5*8] = (Fd + Bdd ) >> 4;
ip[6*8] = (Fd - Bdd ) >> 4;
}else if(type==1){
- dst[0*stride] = cm[(Gd + Cd ) >> 4];
- dst[7*stride] = cm[(Gd - Cd ) >> 4];
+ dst[0*stride] = av_clip_uint8((Gd + Cd ) >> 4);
+ dst[7*stride] = av_clip_uint8((Gd - Cd ) >> 4);
- dst[1*stride] = cm[(Add + Hd ) >> 4];
- dst[2*stride] = cm[(Add - Hd ) >> 4];
+ dst[1*stride] = av_clip_uint8((Add + Hd ) >> 4);
+ dst[2*stride] = av_clip_uint8((Add - Hd ) >> 4);
- dst[3*stride] = cm[(Ed + Dd ) >> 4];
- dst[4*stride] = cm[(Ed - Dd ) >> 4];
+ dst[3*stride] = av_clip_uint8((Ed + Dd ) >> 4);
+ dst[4*stride] = av_clip_uint8((Ed - Dd ) >> 4);
- dst[5*stride] = cm[(Fd + Bdd ) >> 4];
- dst[6*stride] = cm[(Fd - Bdd ) >> 4];
+ dst[5*stride] = av_clip_uint8((Fd + Bdd ) >> 4);
+ dst[6*stride] = av_clip_uint8((Fd - Bdd ) >> 4);
}else{
- dst[0*stride] = cm[dst[0*stride] + ((Gd + Cd ) >> 4)];
- dst[7*stride] = cm[dst[7*stride] + ((Gd - Cd ) >> 4)];
+ dst[0*stride] = av_clip_uint8(dst[0*stride] + ((Gd + Cd ) >> 4));
+ dst[7*stride] = av_clip_uint8(dst[7*stride] + ((Gd - Cd ) >> 4));
- dst[1*stride] = cm[dst[1*stride] + ((Add + Hd ) >> 4)];
- dst[2*stride] = cm[dst[2*stride] + ((Add - Hd ) >> 4)];
+ dst[1*stride] = av_clip_uint8(dst[1*stride] + ((Add + Hd ) >> 4));
+ dst[2*stride] = av_clip_uint8(dst[2*stride] + ((Add - Hd ) >> 4));
- dst[3*stride] = cm[dst[3*stride] + ((Ed + Dd ) >> 4)];
- dst[4*stride] = cm[dst[4*stride] + ((Ed - Dd ) >> 4)];
+ dst[3*stride] = av_clip_uint8(dst[3*stride] + ((Ed + Dd ) >> 4));
+ dst[4*stride] = av_clip_uint8(dst[4*stride] + ((Ed - Dd ) >> 4));
- dst[5*stride] = cm[dst[5*stride] + ((Fd + Bdd ) >> 4)];
- dst[6*stride] = cm[dst[6*stride] + ((Fd - Bdd ) >> 4)];
+ dst[5*stride] = av_clip_uint8(dst[5*stride] + ((Fd + Bdd ) >> 4));
+ dst[6*stride] = av_clip_uint8(dst[6*stride] + ((Fd - Bdd ) >> 4));
}
} else {
@@ -190,18 +189,18 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
dst[4*stride]=
dst[5*stride]=
dst[6*stride]=
- dst[7*stride]= cm[128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20)];
+ dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20));
}else{
if(ip[0*8]){
int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
- dst[0*stride] = cm[dst[0*stride] + v];
- dst[1*stride] = cm[dst[1*stride] + v];
- dst[2*stride] = cm[dst[2*stride] + v];
- dst[3*stride] = cm[dst[3*stride] + v];
- dst[4*stride] = cm[dst[4*stride] + v];
- dst[5*stride] = cm[dst[5*stride] + v];
- dst[6*stride] = cm[dst[6*stride] + v];
- dst[7*stride] = cm[dst[7*stride] + v];
+ dst[0*stride] = av_clip_uint8(dst[0*stride] + v);
+ dst[1*stride] = av_clip_uint8(dst[1*stride] + v);
+ dst[2*stride] = av_clip_uint8(dst[2*stride] + v);
+ dst[3*stride] = av_clip_uint8(dst[3*stride] + v);
+ dst[4*stride] = av_clip_uint8(dst[4*stride] + v);
+ dst[5*stride] = av_clip_uint8(dst[5*stride] + v);
+ dst[6*stride] = av_clip_uint8(dst[6*stride] + v);
+ dst[7*stride] = av_clip_uint8(dst[7*stride] + v);
}
}
}
@@ -225,17 +224,16 @@ void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*
void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/){
int i, dc = (block[0] + 15) >> 5;
- const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
for(i = 0; i < 8; i++){
- dest[0] = cm[dest[0]];
- dest[1] = cm[dest[1]];
- dest[2] = cm[dest[2]];
- dest[3] = cm[dest[3]];
- dest[4] = cm[dest[4]];
- dest[5] = cm[dest[5]];
- dest[6] = cm[dest[6]];
- dest[7] = cm[dest[7]];
+ dest[0] = av_clip_uint8(dest[0] + dc);
+ dest[1] = av_clip_uint8(dest[1] + dc);
+ dest[2] = av_clip_uint8(dest[2] + dc);
+ dest[3] = av_clip_uint8(dest[3] + dc);
+ dest[4] = av_clip_uint8(dest[4] + dc);
+ dest[5] = av_clip_uint8(dest[5] + dc);
+ dest[6] = av_clip_uint8(dest[6] + dc);
+ dest[7] = av_clip_uint8(dest[7] + dc);
dest += line_size;
}
}
diff --git a/libavcodec/vp8dsp.c b/libavcodec/vp8dsp.c
index 12f6988bff..1ee070c78b 100644
--- a/libavcodec/vp8dsp.c
+++ b/libavcodec/vp8dsp.c
@@ -80,7 +80,6 @@ static void vp8_luma_dc_wht_dc_c(DCTELEM block[4][4][16], DCTELEM dc[16])
static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
{
int i, t0, t1, t2, t3;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
DCTELEM tmp[16];
for (i = 0; i < 4; i++) {
@@ -105,10 +104,10 @@ static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]);
t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]);
- dst[0] = cm[dst[0] + ((t0 + t3 + 4) >> 3)];
- dst[1] = cm[dst[1] + ((t1 + t2 + 4) >> 3)];
- dst[2] = cm[dst[2] + ((t1 - t2 + 4) >> 3)];
- dst[3] = cm[dst[3] + ((t0 - t3 + 4) >> 3)];
+ dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
+ dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
+ dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
+ dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
dst += stride;
}
}
@@ -116,14 +115,13 @@ static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
{
int i, dc = (block[0] + 4) >> 3;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
block[0] = 0;
for (i = 0; i < 4; i++) {
- dst[0] = cm[dst[0]];
- dst[1] = cm[dst[1]];
- dst[2] = cm[dst[2]];
- dst[3] = cm[dst[3]];
+ dst[0] = av_clip_uint8(dst[0] + dc);
+ dst[1] = av_clip_uint8(dst[1] + dc);
+ dst[2] = av_clip_uint8(dst[2] + dc);
+ dst[3] = av_clip_uint8(dst[3] + dc);
dst += stride;
}
}
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index c165c52ca4..c3b559bb15 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -104,7 +104,7 @@ cglobal sbr_hf_g_filt, 5, 6, 5
movq m2, [r1]
punpckldq m0, m0
mulps m2, m0
- movq [r0], m2
+ movlps [r0], m2
add r0, 8
add r2, 4
add r1, STEP