aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorJames Darnley <jdarnley@obe.tv>2017-06-12 12:01:21 +0200
committerJames Darnley <jdarnley@obe.tv>2017-06-20 13:35:01 +0200
commitd2597fb0c1c8e4964becf809f03abfa6596a8c14 (patch)
tree86efb42329d014f1ac72998846e44191433b9ade /libavcodec
parent8781330d80e3cd1bd3af2a2a2c630ae26779dedd (diff)
downloadffmpeg-d2597fb0c1c8e4964becf809f03abfa6596a8c14.tar.gz
avcodec/x86: modify simple_idct10 macros to add an action paramter
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/x86/proresdsp.asm2
-rw-r--r--libavcodec/x86/simple_idct10.asm8
-rw-r--r--libavcodec/x86/simple_idct10_template.asm37
3 files changed, 25 insertions, 22 deletions
diff --git a/libavcodec/x86/proresdsp.asm b/libavcodec/x86/proresdsp.asm
index 8318a81c5e..3be0ff7757 100644
--- a/libavcodec/x86/proresdsp.asm
+++ b/libavcodec/x86/proresdsp.asm
@@ -52,7 +52,7 @@ SECTION .text
%macro idct_fn 0
cglobal prores_idct_put_10, 4, 4, 15, pixels, lsize, block, qmat
- IDCT_FN pw_1, 15, pw_88, 18, pw_4, pw_1019, r3
+ IDCT_FN pw_1, 15, pw_88, 18, "put", pw_4, pw_1019, r3
RET
%endmacro
diff --git a/libavcodec/x86/simple_idct10.asm b/libavcodec/x86/simple_idct10.asm
index 7cfd33eaa3..1a5a2eae9b 100644
--- a/libavcodec/x86/simple_idct10.asm
+++ b/libavcodec/x86/simple_idct10.asm
@@ -69,24 +69,24 @@ SECTION .text
%macro idct_fn 0
cglobal simple_idct10, 1, 1, 16, block
- IDCT_FN "", 12, "", 19
+ IDCT_FN "", 12, "", 19, "store"
RET
cglobal simple_idct10_put, 3, 3, 16, pixels, lsize, block
- IDCT_FN "", 12, "", 19, 0, pw_1023
+ IDCT_FN "", 12, "", 19, "put", 0, pw_1023
RET
cglobal simple_idct12, 1, 1, 16, block
; coeffs are already 15bits, adding the offset would cause
; overflow in the input
- IDCT_FN "", 15, pw_2, 16
+ IDCT_FN "", 15, pw_2, 16, "store"
RET
cglobal simple_idct12_put, 3, 3, 16, pixels, lsize, block
; range isn't known, so the C simple_idct range is used
; Also, using a bias on input overflows, so use the bias
; on output of the first butterfly instead
- IDCT_FN "", 15, pw_2, 16, 0, pw_4095
+ IDCT_FN "", 15, pw_2, 16, "put", 0, pw_4095
RET
%endmacro
diff --git a/libavcodec/x86/simple_idct10_template.asm b/libavcodec/x86/simple_idct10_template.asm
index 3f398985a5..8367011dfd 100644
--- a/libavcodec/x86/simple_idct10_template.asm
+++ b/libavcodec/x86/simple_idct10_template.asm
@@ -218,11 +218,12 @@
; %2 = row bias macro
; %3 = column shift
; %4 = column bias macro
-; %5 = min pixel value
-; %6 = max pixel value
-; %7 = qmat (for prores)
+; %5 = final action (nothing, "store", "put", "add")
+; %6 = min pixel value
+; %7 = max pixel value
+; %8 = qmat (for prores)
-%macro IDCT_FN 4-7
+%macro IDCT_FN 4-8
; for (i = 0; i < 8; i++)
; idctRowCondDC(block + i*8);
mova m10,[blockq+ 0] ; { row[0] }[0-7]
@@ -230,13 +231,13 @@
mova m13,[blockq+64] ; { row[4] }[0-7]
mova m12,[blockq+96] ; { row[6] }[0-7]
-%if %0 == 7
- pmullw m10,[%7+ 0]
- pmullw m8, [%7+32]
- pmullw m13,[%7+64]
- pmullw m12,[%7+96]
+%if %0 == 8
+ pmullw m10,[%8+ 0]
+ pmullw m8, [%8+32]
+ pmullw m13,[%8+64]
+ pmullw m12,[%8+96]
- IDCT_1D %1, %2, %7
+ IDCT_1D %1, %2, %8
%else
IDCT_1D %1, %2
%endif
@@ -257,7 +258,8 @@
IDCT_1D %3, %4
; clip/store
-%if %0 == 4
+%if %0 >= 5
+%ifidn %5,"store"
; No clamping, means pure idct
mova [blockq+ 0], m8
mova [blockq+ 16], m0
@@ -267,13 +269,13 @@
mova [blockq+ 80], m11
mova [blockq+ 96], m9
mova [blockq+112], m10
-%else
-%ifidn %5, 0
+%elifidn %5,"put"
+%ifidn %6, 0
pxor m3, m3
%else
- mova m3, [%5]
-%endif
- mova m5, [%6]
+ mova m3, [%6]
+%endif ; ifidn %6, 0
+ mova m5, [%7]
pmaxsw m8, m3
pmaxsw m0, m3
pmaxsw m1, m3
@@ -301,7 +303,8 @@
mova [r0+r1 ], m11
mova [r0+r1*2], m9
mova [r0+r2 ], m10
-%endif
+%endif ; %5 action
+%endif; if %0 >= 5
%endmacro
%endif