Update 8-bit H.264 IDCT function names to reflect bit-depth.

Signed-off-by: Ronald S. Bultje <rbultje@google.com>
author: Daniel Kang <daniel.d.kang@gmail.com> 2011-05-24 15:15:08 -0400
committer: Ronald S. Bultje <rbultje@google.com> 2011-05-31 15:02:32 -0700
commit: 348493db60de19d1997fd2861e130720218b9fcf (patch)
tree: dc9c8b9489d6a452bf63a07ed77bcf57d99269ca
parent: 836f47d34b49e8ba9883e738a42f154130421caa (diff)
download: ffmpeg-348493db60de19d1997fd2861e130720218b9fcf.tar.gz
3 files changed, 57 insertions, 72 deletions
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 87a1dd9722..864c118bb5 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -66,7 +66,6 @@ typedef struct H264DSPContext{
     void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
     void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
 
-    void (*h264_dct)(DCTELEM block[4][4]);
     void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
     void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
     void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index ae70a3049b..f90f41c4bc 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -73,7 +73,7 @@ SECTION .text
 
 INIT_MMX
 ; ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
-cglobal h264_idct_add_mmx, 3, 3, 0
+cglobal h264_idct_add_8_mmx, 3, 3, 0
     IDCT4_ADD    r0, r1, r2
     RET
 
@@ -125,7 +125,7 @@ cglobal h264_idct_add_mmx, 3, 3, 0
     SUMSUB_BA    w, 0, 4
     SUMSUB_BA    w, 3, 2
     SUMSUB_BA    w, 1, 5
-    SWAP          7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
+    SWAP         7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
 %endmacro
 
 %macro IDCT8_1D_FULL 1
@@ -177,7 +177,7 @@ cglobal h264_idct_add_mmx, 3, 3, 0
 
 INIT_MMX
 ; ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
-cglobal h264_idct8_add_mmx, 3, 4, 0
+cglobal h264_idct8_add_8_mmx, 3, 4, 0
     %assign pad 128+4-(stack_offset&7)
     SUB         rsp, pad
 
@@ -237,7 +237,7 @@ cglobal h264_idct8_add_mmx, 3, 4, 0
 
 INIT_XMM
 ; ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
-cglobal h264_idct8_add_sse2, 3, 4, 10
+cglobal h264_idct8_add_8_sse2, 3, 4, 10
     IDCT8_ADD_SSE r0, r1, r2, r3
     RET
 
@@ -261,7 +261,7 @@ cglobal h264_idct8_add_sse2, 3, 4, 10
     packuswb     m1, m1
 %endmacro
 
-%macro DC_ADD_MMX2_OP 3-4
+%macro DC_ADD_MMX2_OP 4
     %1           m2, [%2     ]
     %1           m3, [%2+%3  ]
     %1           m4, [%2+%3*2]
@@ -282,13 +282,13 @@ cglobal h264_idct8_add_sse2, 3, 4, 10
 
 INIT_MMX
 ; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
-cglobal h264_idct_dc_add_mmx2, 3, 3, 0
+cglobal h264_idct_dc_add_8_mmx2, 3, 3, 0
     DC_ADD_MMX2_INIT r1, r2
     DC_ADD_MMX2_OP movh, r0, r2, r1
     RET
 
 ; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
-cglobal h264_idct8_dc_add_mmx2, 3, 3, 0
+cglobal h264_idct8_dc_add_8_mmx2, 3, 3, 0
     DC_ADD_MMX2_INIT r1, r2
     DC_ADD_MMX2_OP mova, r0, r2, r1
     lea          r0, [r0+r2*4]
@@ -297,7 +297,7 @@ cglobal h264_idct8_dc_add_mmx2, 3, 3, 0
 
 ; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
 ;             DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add16_mmx, 5, 7, 0
+cglobal h264_idct_add16_8_mmx, 5, 7, 0
     xor          r5, r5
 %ifdef PIC
     lea         r11, [scan8_mem]
@@ -319,7 +319,7 @@ cglobal h264_idct_add16_mmx, 5, 7, 0
 
 ; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset,
 ;                        DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct8_add4_mmx, 5, 7, 0
+cglobal h264_idct8_add4_8_mmx, 5, 7, 0
     %assign pad 128+4-(stack_offset&7)
     SUB         rsp, pad
 
@@ -351,7 +351,7 @@ cglobal h264_idct8_add4_mmx, 5, 7, 0
 
 ; ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset,
 ;                         DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add16_mmx2, 5, 7, 0
+cglobal h264_idct_add16_8_mmx2, 5, 7, 0
     xor          r5, r5
 %ifdef PIC
     lea         r11, [scan8_mem]
@@ -398,7 +398,7 @@ cglobal h264_idct_add16_mmx2, 5, 7, 0
 
 ; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset,
 ;                             DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add16intra_mmx, 5, 7, 0
+cglobal h264_idct_add16intra_8_mmx, 5, 7, 0
     xor          r5, r5
 %ifdef PIC
     lea         r11, [scan8_mem]
@@ -421,7 +421,7 @@ cglobal h264_idct_add16intra_mmx, 5, 7, 0
 
 ; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
 ;                              DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add16intra_mmx2, 5, 7, 0
+cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0
     xor          r5, r5
 %ifdef PIC
     lea         r11, [scan8_mem]
@@ -466,7 +466,7 @@ cglobal h264_idct_add16intra_mmx2, 5, 7, 0
 
 ; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset,
 ;                         DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct8_add4_mmx2, 5, 7, 0
+cglobal h264_idct8_add4_8_mmx2, 5, 7, 0
     %assign pad 128+4-(stack_offset&7)
     SUB         rsp, pad
 
@@ -529,7 +529,7 @@ cglobal h264_idct8_add4_mmx2, 5, 7, 0
 INIT_XMM
 ; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset,
 ;                         DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct8_add4_sse2, 5, 7, 10
+cglobal h264_idct8_add4_8_sse2, 5, 7, 10
     xor          r5, r5
 %ifdef PIC
     lea         r11, [scan8_mem]
@@ -607,7 +607,7 @@ h264_idct_add8_mmx_plane:
 
 ; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset,
 ;                       DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add8_mmx, 5, 7, 0
+cglobal h264_idct_add8_8_mmx, 5, 7, 0
     mov          r5, 16
     add          r2, 512
 %ifdef PIC
@@ -668,7 +668,7 @@ h264_idct_add8_mmx2_plane
 
 ; ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset,
 ;                        DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add8_mmx2, 5, 7, 0
+cglobal h264_idct_add8_8_mmx2, 5, 7, 0
     mov          r5, 16
     add          r2, 512
 %ifdef ARCH_X86_64
@@ -744,7 +744,7 @@ x264_add8x4_idct_sse2:
 
 ; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
 ;                         DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add16_sse2, 5, 5, 8
+cglobal h264_idct_add16_8_sse2, 5, 5, 8
 %ifdef ARCH_X86_64
     mov        r10, r0
 %endif
@@ -791,7 +791,7 @@ cglobal h264_idct_add16_sse2, 5, 5, 8
 
 ; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
 ;                              DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add16intra_sse2, 5, 7, 8
+cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
 %ifdef ARCH_X86_64
     mov        r10, r0
 %endif
@@ -840,7 +840,7 @@ cglobal h264_idct_add16intra_sse2, 5, 7, 8
 
 ; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset,
 ;                        DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add8_sse2, 5, 7, 8
+cglobal h264_idct_add8_8_sse2, 5, 7, 8
     add          r2, 512
 %ifdef ARCH_X86_64
     mov         r10, r0
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index d60fbd5e79..1a31e41a43 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -30,9 +30,14 @@ DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1  ) = 0x0103010301030103ULL;
 #define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \
 void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT (uint8_t *dst, int16_t *block, int stride);
 
+IDCT_ADD_FUNC(, 8, mmx)
 IDCT_ADD_FUNC(, 10, sse2)
+IDCT_ADD_FUNC(_dc, 8, mmx2)
 IDCT_ADD_FUNC(_dc, 10, mmx2)
+IDCT_ADD_FUNC(8_dc, 8, mmx2)
 IDCT_ADD_FUNC(8_dc, 10, sse2)
+IDCT_ADD_FUNC(8, 8, mmx)
+IDCT_ADD_FUNC(8, 8, sse2)
 IDCT_ADD_FUNC(8, 10, sse2)
 #if HAVE_AVX
 IDCT_ADD_FUNC(, 10, avx)
@@ -46,9 +51,18 @@ void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
                               (uint8_t *dst, const int *block_offset, \
                               DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
 
+IDCT_ADD_REP_FUNC(8, 4, 8, mmx)
+IDCT_ADD_REP_FUNC(8, 4, 8, mmx2)
+IDCT_ADD_REP_FUNC(8, 4, 8, sse2)
 IDCT_ADD_REP_FUNC(8, 4, 10, sse2)
 IDCT_ADD_REP_FUNC(8, 4, 10, avx)
+IDCT_ADD_REP_FUNC(, 16, 8, mmx)
+IDCT_ADD_REP_FUNC(, 16, 8, mmx2)
+IDCT_ADD_REP_FUNC(, 16, 8, sse2)
 IDCT_ADD_REP_FUNC(, 16, 10, sse2)
+IDCT_ADD_REP_FUNC(, 16intra, 8, mmx)
+IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2)
+IDCT_ADD_REP_FUNC(, 16intra, 8, sse2)
 IDCT_ADD_REP_FUNC(, 16intra, 10, sse2)
 #if HAVE_AVX
 IDCT_ADD_REP_FUNC(, 16, 10, avx)
@@ -60,42 +74,14 @@ IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
 void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
                               (uint8_t **dst, const int *block_offset, \
                               DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+IDCT_ADD_REP_FUNC2(, 8, 8, mmx)
+IDCT_ADD_REP_FUNC2(, 8, 8, mmx2)
+IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
 IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
 #if HAVE_AVX
 IDCT_ADD_REP_FUNC2(, 8, 10, avx)
 #endif
 
-void ff_h264_idct_add_mmx     (uint8_t *dst, int16_t *block, int stride);
-void ff_h264_idct8_add_mmx    (uint8_t *dst, int16_t *block, int stride);
-void ff_h264_idct8_add_sse2   (uint8_t *dst, int16_t *block, int stride);
-void ff_h264_idct_dc_add_mmx2 (uint8_t *dst, int16_t *block, int stride);
-void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride);
-
-void ff_h264_idct_add16_mmx      (uint8_t *dst, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct8_add4_mmx      (uint8_t *dst, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add16_mmx2     (uint8_t *dst, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add16intra_mmx (uint8_t *dst, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct8_add4_mmx2     (uint8_t *dst, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct8_add4_sse2     (uint8_t *dst, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add8_mmx       (uint8_t **dest, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add8_mmx2      (uint8_t **dest, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-
-void ff_h264_idct_add16_sse2     (uint8_t *dst, const int *block_offset, DCTELEM *block,
-                                  int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block,
-                                  int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add8_sse2      (uint8_t **dest, const int *block_offset, DCTELEM *block,
-                                  int stride, const uint8_t nnzc[6*8]);
 void ff_h264_luma_dc_dequant_idct_mmx (DCTELEM *output, DCTELEM *input, int qmul);
 void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul);
 
@@ -350,24 +336,24 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
     }
 #if HAVE_YASM
     if (mm_flags & AV_CPU_FLAG_MMX) {
-        c->h264_idct_dc_add=
-        c->h264_idct_add= ff_h264_idct_add_mmx;
-        c->h264_idct8_dc_add=
-        c->h264_idct8_add= ff_h264_idct8_add_mmx;
-
-        c->h264_idct_add16     = ff_h264_idct_add16_mmx;
-        c->h264_idct8_add4     = ff_h264_idct8_add4_mmx;
-        c->h264_idct_add8      = ff_h264_idct_add8_mmx;
-        c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;
+        c->h264_idct_dc_add         =
+        c->h264_idct_add            = ff_h264_idct_add_8_mmx;
+        c->h264_idct8_dc_add        =
+        c->h264_idct8_add           = ff_h264_idct8_add_8_mmx;
+
+        c->h264_idct_add16          = ff_h264_idct_add16_8_mmx;
+        c->h264_idct8_add4          = ff_h264_idct8_add4_8_mmx;
+        c->h264_idct_add8           = ff_h264_idct_add8_8_mmx;
+        c->h264_idct_add16intra     = ff_h264_idct_add16intra_8_mmx;
         c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx;
 
         if (mm_flags & AV_CPU_FLAG_MMX2) {
-            c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
-            c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
-            c->h264_idct_add16     = ff_h264_idct_add16_mmx2;
-            c->h264_idct8_add4     = ff_h264_idct8_add4_mmx2;
-            c->h264_idct_add8      = ff_h264_idct_add8_mmx2;
-            c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
+            c->h264_idct_dc_add    = ff_h264_idct_dc_add_8_mmx2;
+            c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_8_mmx2;
+            c->h264_idct_add16     = ff_h264_idct_add16_8_mmx2;
+            c->h264_idct8_add4     = ff_h264_idct8_add4_8_mmx2;
+            c->h264_idct_add8      = ff_h264_idct_add8_8_mmx2;
+            c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2;
 
             c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext;
             c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
@@ -398,8 +384,12 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
             c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
 
             if (mm_flags&AV_CPU_FLAG_SSE2) {
-                c->h264_idct8_add = ff_h264_idct8_add_sse2;
-                c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
+                c->h264_idct8_add           = ff_h264_idct8_add_8_sse2;
+
+                c->h264_idct_add16          = ff_h264_idct_add16_8_sse2;
+                c->h264_idct8_add4          = ff_h264_idct8_add4_8_sse2;
+                c->h264_idct_add8           = ff_h264_idct_add8_8_sse2;
+                c->h264_idct_add16intra     = ff_h264_idct_add16intra_8_sse2;
                 c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2;
 
                 c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_sse2;
@@ -420,10 +410,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
                 c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
                 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
 #endif
-
-                c->h264_idct_add16 = ff_h264_idct_add16_sse2;
-                c->h264_idct_add8  = ff_h264_idct_add8_sse2;
-                c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
             }
             if (mm_flags&AV_CPU_FLAG_SSSE3) {
                 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3;
author	Daniel Kang <daniel.d.kang@gmail.com>	2011-05-24 15:15:08 -0400
committer	Ronald S. Bultje <rbultje@google.com>	2011-05-31 15:02:32 -0700
commit	348493db60de19d1997fd2861e130720218b9fcf (patch)
tree	dc9c8b9489d6a452bf63a07ed77bcf57d99269ca
parent	836f47d34b49e8ba9883e738a42f154130421caa (diff)
download	ffmpeg-348493db60de19d1997fd2861e130720218b9fcf.tar.gz