Merge remote-tracking branch 'qatar/master'

* qatar/master: ac3enc: use correct alignment and length in channel coupling dsp functions. ffmpeg: don't abuse a global for passing framerate from input to output ffmpeg: don't abuse a global for passing channels from input to output ffmpeg: don't abuse a global for passing samplerate from input to output ARM: update ff_h264_idct8_add4_neon for 4:4:4 changes swscale: use SwsContext for av_log when available swscale: Remove HAVE_MMX from files that are only compiled with MMX enabled. swscale: Fix compilation with --disable-mmx2. Conflicts: ffmpeg.c libswscale/utils.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
author: Michael Niedermayer <michaelni@gmx.at> 2011-06-16 03:53:58 +0200
committer: Michael Niedermayer <michaelni@gmx.at> 2011-06-16 03:53:58 +0200
commit: 7a02527b05e2ae5ffab579062dbe3c888758335f (patch)
tree: c52ec9666f9436a6c15df3c6b32d08d897771aba /libavcodec
parent: a0bafaabb0656ca3bb3591beba0de79f6153fdac (diff)
parent: b203f65451646b1555d458a3601159f7d89a3397 (diff)
download: ffmpeg-7a02527b05e2ae5ffab579062dbe3c888758335f.tar.gz
3 files changed, 38 insertions, 30 deletions
diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
index 0547165aaf..f6248a82c9 100644
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -134,36 +134,38 @@ void AC3_NAME(apply_channel_coupling)(AC3EncodeContext *s)
     LOCAL_ALIGNED_16(int32_t, fixed_cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
     int blk, ch, bnd, i, j;
     CoefSumType energy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
-    int num_cpl_coefs = s->num_cpl_subbands * 12;
+    int cpl_start, num_cpl_coefs;
 
     memset(cpl_coords,       0, AC3_MAX_BLOCKS * sizeof(*cpl_coords));
     memset(fixed_cpl_coords, 0, AC3_MAX_BLOCKS * sizeof(*fixed_cpl_coords));
 
+    /* align start to 16-byte boundary. align length to multiple of 32.
+        note: coupling start bin % 4 will always be 1 */
+    cpl_start     = s->start_freq[CPL_CH] - 1;
+    num_cpl_coefs = FFALIGN(s->num_cpl_subbands * 12 + 1, 32);
+    cpl_start     = FFMIN(256, cpl_start + num_cpl_coefs) - num_cpl_coefs;
+
     /* calculate coupling channel from fbw channels */
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        CoefType *cpl_coef = &block->mdct_coef[CPL_CH][s->start_freq[CPL_CH]];
+        CoefType *cpl_coef = &block->mdct_coef[CPL_CH][cpl_start];
         if (!block->cpl_in_use)
             continue;
-        memset(cpl_coef-1, 0, (num_cpl_coefs+4) * sizeof(*cpl_coef));
+        memset(cpl_coef, 0, num_cpl_coefs * sizeof(*cpl_coef));
         for (ch = 1; ch <= s->fbw_channels; ch++) {
-            CoefType *ch_coef = &block->mdct_coef[ch][s->start_freq[CPL_CH]];
+            CoefType *ch_coef = &block->mdct_coef[ch][cpl_start];
             if (!block->channel_in_cpl[ch])
                 continue;
             for (i = 0; i < num_cpl_coefs; i++)
                 cpl_coef[i] += ch_coef[i];
         }
-        /* note: coupling start bin % 4 will always be 1 and num_cpl_coefs
-                 will always be a multiple of 12, so we need to subtract 1 from
-                 the start and add 4 to the length when using optimized
-                 functions which require 16-byte alignment. */
 
         /* coefficients must be clipped to +/- 1.0 in order to be encoded */
-        s->dsp.vector_clipf(cpl_coef-1, cpl_coef-1, -1.0f, 1.0f, num_cpl_coefs+4);
+        s->dsp.vector_clipf(cpl_coef, cpl_coef, -1.0f, 1.0f, num_cpl_coefs);
 
         /* scale coupling coefficients from float to 24-bit fixed-point */
-        s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][s->start_freq[CPL_CH]-1],
-                                   cpl_coef-1, num_cpl_coefs+4);
+        s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][cpl_start],
+                                   cpl_coef, num_cpl_coefs);
     }
 
     /* calculate energy in each band in coupling channel and each fbw channel */
diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c
index b344584799..e9146405c2 100644
--- a/libavcodec/arm/h264dsp_init_arm.c
+++ b/libavcodec/arm/h264dsp_init_arm.c
@@ -122,8 +122,7 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
     c->h264_idct_dc_add     = ff_h264_idct_dc_add_neon;
     c->h264_idct_add16      = ff_h264_idct_add16_neon;
     c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
-    //FIXME: reenable when asm is updated.
-    //c->h264_idct_add8       = ff_h264_idct_add8_neon;
+    c->h264_idct_add8       = ff_h264_idct_add8_neon;
     c->h264_idct8_add       = ff_h264_idct8_add_neon;
     c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
     c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S
index 6b6a669f35..afd3718518 100644
--- a/libavcodec/arm/h264idct_neon.S
+++ b/libavcodec/arm/h264idct_neon.S
@@ -148,24 +148,27 @@ function ff_h264_idct_add8_neon, export=1
         add             r5,  r1,  #16*4
         add             r1,  r2,  #16*32
         mov             r2,  r3
+        mov             r3,  r1
         ldr             r6,  [sp, #32]
         movrel          r7,  scan8+16
-        mov             ip,  #7
-1:      ldrb            r8,  [r7], #1
-        ldr             r0,  [r5], #4
+        mov             r12, #0
+1:      ldrb            r8,  [r7, r12]
+        ldr             r0,  [r5, r12, lsl #2]
         ldrb            r8,  [r6, r8]
-        tst             ip,  #4
-        addne           r0,  r0,  r4
-        addeq           r0,  r0,  r9
+        add             r0,  r0,  r4
+        add             r1,  r3,  r12, lsl #5
         cmp             r8,  #0
         ldrsh           r8,  [r1]
         adrne           lr,  ff_h264_idct_add_neon
         adreq           lr,  ff_h264_idct_dc_add_neon
         cmpeq           r8,  #0
         blxne           lr
-        subs            ip,  ip,  #1
-        add             r1,  r1,  #32
-        bge             1b
+        add             r12, r12, #1
+        cmp             r12, #4
+        moveq           r12, #16
+        moveq           r4,  r9
+        cmp             r12, #20
+        blt             1b
         pop             {r4-r10,pc}
 endfunc
 
@@ -374,11 +377,15 @@ function ff_h264_idct8_add4_neon, export=1
 endfunc
 
         .section .rodata
-scan8:  .byte           4+1*8, 5+1*8, 4+2*8, 5+2*8
-        .byte           6+1*8, 7+1*8, 6+2*8, 7+2*8
-        .byte           4+3*8, 5+3*8, 4+4*8, 5+4*8
-        .byte           6+3*8, 7+3*8, 6+4*8, 7+4*8
-        .byte           1+1*8, 2+1*8
-        .byte           1+2*8, 2+2*8
-        .byte           1+4*8, 2+4*8
-        .byte           1+5*8, 2+5*8
+scan8:  .byte           4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
+        .byte           6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
+        .byte           4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
+        .byte           6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
+        .byte           4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
+        .byte           6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
+        .byte           4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
+        .byte           6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
+        .byte           4+11*8, 5+11*8, 4+12*8, 5+12*8
+        .byte           6+11*8, 7+11*8, 6+12*8, 7+12*8
+        .byte           4+13*8, 5+13*8, 4+14*8, 5+14*8
+        .byte           6+13*8, 7+13*8, 6+14*8, 7+14*8
author	Michael Niedermayer <michaelni@gmx.at>	2011-06-16 03:53:58 +0200
committer	Michael Niedermayer <michaelni@gmx.at>	2011-06-16 03:53:58 +0200
commit	7a02527b05e2ae5ffab579062dbe3c888758335f (patch)
tree	c52ec9666f9436a6c15df3c6b32d08d897771aba /libavcodec
parent	a0bafaabb0656ca3bb3591beba0de79f6153fdac (diff)
parent	b203f65451646b1555d458a3601159f7d89a3397 (diff)
download	ffmpeg-7a02527b05e2ae5ffab579062dbe3c888758335f.tar.gz