aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-12-09 00:05:51 +0100
committerMichael Niedermayer <michaelni@gmx.at>2011-12-09 00:05:51 +0100
commit25b9eef410f4a737250dcf2d17b65f6c0c39cd6a (patch)
treea1a8c88aca0b5a88a6fd0c2e9ac1008698f34fea /libavcodec
parentb229485f1ad38162927b235a359b98ff5a0dc13a (diff)
parent6b60a4c9c94bbe03afc8e0851197d97d96f644e5 (diff)
downloadffmpeg-25b9eef410f4a737250dcf2d17b65f6c0c39cd6a.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: cljr: K&R cosmetics cljr: return a more sensible value when encountering invalid headers cljr: drop unnecessary emms_c() calls without MMX code cljr: remove useless casts cljr: group encode/decode parts under single ifdefs cljr: remove stray semicolon cljr: add missing return statement in decode_end() doc: add pulseaudio to the input list avconv: remove unsubstantiated comment shorten: avoid abort() on unknown audio types cljr: add encoder build: merge lists of HTML documentation targets tests/examples: Mark some variables only used within their files as static. tests/tools/examples: Replace direct exit() calls by return. x86 cpuid: set vendor union members separately cljr: release picture at end of decoding rv40: NEON optimised rv40 qpel motion compensation Conflicts: doc/examples/muxing.c libavcodec/cljr.c libavcodec/version.h Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/allcodecs.c2
-rw-r--r--libavcodec/arm/rv40dsp_init_neon.c75
-rw-r--r--libavcodec/arm/rv40dsp_neon.S639
-rw-r--r--libavcodec/cljr.c153
-rw-r--r--libavcodec/dct-test.c2
-rw-r--r--libavcodec/fft-test.c5
-rw-r--r--libavcodec/motion-test.c8
-rw-r--r--libavcodec/shorten.c8
-rw-r--r--libavcodec/version.h4
9 files changed, 808 insertions, 88 deletions
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 02df6f20ab..19afbd4895 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -89,7 +89,7 @@ void avcodec_register_all(void)
REGISTER_DECODER (CAVS, cavs);
REGISTER_DECODER (CDGRAPHICS, cdgraphics);
REGISTER_DECODER (CINEPAK, cinepak);
- REGISTER_DECODER (CLJR, cljr);
+ REGISTER_ENCDEC (CLJR, cljr);
REGISTER_DECODER (CSCD, cscd);
REGISTER_DECODER (CYUV, cyuv);
REGISTER_DECODER (DFA, dfa);
diff --git a/libavcodec/arm/rv40dsp_init_neon.c b/libavcodec/arm/rv40dsp_init_neon.c
index 3a863e1916..36d75e6fd8 100644
--- a/libavcodec/arm/rv40dsp_init_neon.c
+++ b/libavcodec/arm/rv40dsp_init_neon.c
@@ -23,6 +23,28 @@
#include "libavcodec/avcodec.h"
#include "libavcodec/rv34dsp.h"
+#define DECL_QPEL3(type, w, pos) \
+ void ff_##type##_rv40_qpel##w##_mc##pos##_neon(uint8_t *dst, uint8_t *src,\
+ int stride)
+#define DECL_QPEL2(w, pos) \
+ DECL_QPEL3(put, w, pos); \
+ DECL_QPEL3(avg, w, pos)
+
+#define DECL_QPEL_XY(x, y) \
+ DECL_QPEL2(16, x ## y); \
+ DECL_QPEL2(8, x ## y)
+
+#define DECL_QPEL_Y(y) \
+ DECL_QPEL_XY(0, y); \
+ DECL_QPEL_XY(1, y); \
+ DECL_QPEL_XY(2, y); \
+ DECL_QPEL_XY(3, y); \
+
+DECL_QPEL_Y(0);
+DECL_QPEL_Y(1);
+DECL_QPEL_Y(2);
+DECL_QPEL_Y(3);
+
void ff_put_rv40_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_put_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
@@ -34,6 +56,59 @@ void ff_rv40_weight_func_8_neon(uint8_t *, uint8_t *, uint8_t *, int, int, int);
void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
{
+ c->put_pixels_tab[0][ 1] = ff_put_rv40_qpel16_mc10_neon;
+ c->put_pixels_tab[0][ 3] = ff_put_rv40_qpel16_mc30_neon;
+ c->put_pixels_tab[0][ 4] = ff_put_rv40_qpel16_mc01_neon;
+ c->put_pixels_tab[0][ 5] = ff_put_rv40_qpel16_mc11_neon;
+ c->put_pixels_tab[0][ 6] = ff_put_rv40_qpel16_mc21_neon;
+ c->put_pixels_tab[0][ 7] = ff_put_rv40_qpel16_mc31_neon;
+ c->put_pixels_tab[0][ 9] = ff_put_rv40_qpel16_mc12_neon;
+ c->put_pixels_tab[0][10] = ff_put_rv40_qpel16_mc22_neon;
+ c->put_pixels_tab[0][11] = ff_put_rv40_qpel16_mc32_neon;
+ c->put_pixels_tab[0][12] = ff_put_rv40_qpel16_mc03_neon;
+ c->put_pixels_tab[0][13] = ff_put_rv40_qpel16_mc13_neon;
+ c->put_pixels_tab[0][14] = ff_put_rv40_qpel16_mc23_neon;
+ c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_neon;
+ c->avg_pixels_tab[0][ 1] = ff_avg_rv40_qpel16_mc10_neon;
+ c->avg_pixels_tab[0][ 3] = ff_avg_rv40_qpel16_mc30_neon;
+ c->avg_pixels_tab[0][ 4] = ff_avg_rv40_qpel16_mc01_neon;
+ c->avg_pixels_tab[0][ 5] = ff_avg_rv40_qpel16_mc11_neon;
+ c->avg_pixels_tab[0][ 6] = ff_avg_rv40_qpel16_mc21_neon;
+ c->avg_pixels_tab[0][ 7] = ff_avg_rv40_qpel16_mc31_neon;
+ c->avg_pixels_tab[0][ 9] = ff_avg_rv40_qpel16_mc12_neon;
+ c->avg_pixels_tab[0][10] = ff_avg_rv40_qpel16_mc22_neon;
+ c->avg_pixels_tab[0][11] = ff_avg_rv40_qpel16_mc32_neon;
+ c->avg_pixels_tab[0][12] = ff_avg_rv40_qpel16_mc03_neon;
+ c->avg_pixels_tab[0][13] = ff_avg_rv40_qpel16_mc13_neon;
+ c->avg_pixels_tab[0][14] = ff_avg_rv40_qpel16_mc23_neon;
+ c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_neon;
+ c->put_pixels_tab[1][ 1] = ff_put_rv40_qpel8_mc10_neon;
+ c->put_pixels_tab[1][ 3] = ff_put_rv40_qpel8_mc30_neon;
+ c->put_pixels_tab[1][ 4] = ff_put_rv40_qpel8_mc01_neon;
+ c->put_pixels_tab[1][ 5] = ff_put_rv40_qpel8_mc11_neon;
+ c->put_pixels_tab[1][ 6] = ff_put_rv40_qpel8_mc21_neon;
+ c->put_pixels_tab[1][ 7] = ff_put_rv40_qpel8_mc31_neon;
+ c->put_pixels_tab[1][ 9] = ff_put_rv40_qpel8_mc12_neon;
+ c->put_pixels_tab[1][10] = ff_put_rv40_qpel8_mc22_neon;
+ c->put_pixels_tab[1][11] = ff_put_rv40_qpel8_mc32_neon;
+ c->put_pixels_tab[1][12] = ff_put_rv40_qpel8_mc03_neon;
+ c->put_pixels_tab[1][13] = ff_put_rv40_qpel8_mc13_neon;
+ c->put_pixels_tab[1][14] = ff_put_rv40_qpel8_mc23_neon;
+ c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_neon;
+ c->avg_pixels_tab[1][ 1] = ff_avg_rv40_qpel8_mc10_neon;
+ c->avg_pixels_tab[1][ 3] = ff_avg_rv40_qpel8_mc30_neon;
+ c->avg_pixels_tab[1][ 4] = ff_avg_rv40_qpel8_mc01_neon;
+ c->avg_pixels_tab[1][ 5] = ff_avg_rv40_qpel8_mc11_neon;
+ c->avg_pixels_tab[1][ 6] = ff_avg_rv40_qpel8_mc21_neon;
+ c->avg_pixels_tab[1][ 7] = ff_avg_rv40_qpel8_mc31_neon;
+ c->avg_pixels_tab[1][ 9] = ff_avg_rv40_qpel8_mc12_neon;
+ c->avg_pixels_tab[1][10] = ff_avg_rv40_qpel8_mc22_neon;
+ c->avg_pixels_tab[1][11] = ff_avg_rv40_qpel8_mc32_neon;
+ c->avg_pixels_tab[1][12] = ff_avg_rv40_qpel8_mc03_neon;
+ c->avg_pixels_tab[1][13] = ff_avg_rv40_qpel8_mc13_neon;
+ c->avg_pixels_tab[1][14] = ff_avg_rv40_qpel8_mc23_neon;
+ c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_neon;
+
c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_neon;
c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_neon;
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_neon;
diff --git a/libavcodec/arm/rv40dsp_neon.S b/libavcodec/arm/rv40dsp_neon.S
index cafd98add0..07ba8428c1 100644
--- a/libavcodec/arm/rv40dsp_neon.S
+++ b/libavcodec/arm/rv40dsp_neon.S
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
+ * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
*
* This file is part of Libav.
*
@@ -19,6 +20,644 @@
*/
#include "asm.S"
+#include "neon.S"
+
+.macro qpel_lowpass r0, r1, rc1, rc2, shift
+ vext.8 d25, \r0, \r1, #1 @ src[-1]
+ vext.8 d26, \r0, \r1, #4 @ src[ 2]
+ vext.8 d24, \r0, \r1, #5 @ src[ 3]
+ vaddl.u8 q9, d25, d26
+ vaddl.u8 q8, \r0, d24
+ vext.8 d27, \r0, \r1, #2 @ src[ 0]
+ vshl.s16 q12, q9, #2
+ vsub.s16 q8, q8, q9
+ vext.8 d28, \r0, \r1, #3 @ src[ 1]
+ vsub.s16 q8, q8, q12
+ vmlal.u8 q8, d27, \rc1
+ vmlal.u8 q8, d28, \rc2
+ vqrshrun.s16 \r0, q8, #\shift
+.endm
+
+.macro qpel_lowpass_x2 r0, r1, r2, r3, rc1, rc2, shift
+ vext.8 d25, \r0, \r1, #1 @ src[-1]
+ vext.8 d26, \r0, \r1, #4 @ src[ 2]
+ vext.8 d24, \r0, \r1, #5 @ src[ 3]
+ vaddl.u8 q9, d25, d26
+ vaddl.u8 q8, \r0, d24
+ vext.8 d29, \r0, \r1, #2 @ src[ 0]
+ vext.8 d28, \r0, \r1, #3 @ src[ 1]
+ vshl.s16 q10, q9, #2
+ vext.8 \r1, \r2, \r3, #1 @ src[-1]
+ vsub.s16 q8, q8, q9
+ vext.8 d22, \r2, \r3, #4 @ src[ 2]
+ vext.8 \r0, \r2, \r3, #5 @ src[ 3]
+ vaddl.u8 q13, \r1, d22
+ vaddl.u8 q12, \r2, \r0
+ vsub.s16 q8, q8, q10
+ vshl.s16 q9, q13, #2
+ vsub.s16 q12, q12, q13
+ vmlal.u8 q8, d29, \rc1
+ vmlal.u8 q8, d28, \rc2
+ vsub.s16 q12, q12, q9
+ vext.8 d26, \r2, \r3, #2 @ src[ 0]
+ vext.8 d27, \r2, \r3, #3 @ src[ 1]
+ vmlal.u8 q12, d26, \rc1
+ vmlal.u8 q12, d27, \rc2
+ vqrshrun.s16 \r0, q8, #\shift
+ vqrshrun.s16 \r2, q12, #\shift
+.endm
+
+.macro rv40_qpel8_h shift
+function put_rv40_qpel8_h_lp_packed_s\shift\()_neon
+1:
+ vld1.8 {q2}, [r1], r2
+ vld1.8 {q3}, [r1], r2
+ qpel_lowpass_x2 d4, d5, d6, d7, d0, d1, \shift
+ vst1.8 {d4}, [r12,:64]!
+ vst1.8 {d6}, [r12,:64]!
+ subs r3, r3, #2
+ bgt 1b
+ vld1.8 {q2}, [r1]
+ qpel_lowpass d4, d5, d0, d1, \shift
+ vst1.8 {d4}, [r12,:64]!
+ bx lr
+endfunc
+.endm
+
+.macro rv40_qpel8_v shift, type
+function \type\()_rv40_qpel8_v_lp_packed_s\shift\()_neon
+ vld1.64 {d2}, [r1,:64]!
+ vld1.64 {d3}, [r1,:64]!
+ vld1.64 {d4}, [r1,:64]!
+ vld1.64 {d5}, [r1,:64]!
+ vld1.64 {d6}, [r1,:64]!
+ vld1.64 {d7}, [r1,:64]!
+ vld1.64 {d8}, [r1,:64]!
+ vld1.64 {d9}, [r1,:64]!
+ vld1.64 {d10}, [r1,:64]!
+ vld1.64 {d11}, [r1,:64]!
+ vld1.64 {d12}, [r1,:64]!
+ vld1.64 {d13}, [r1,:64]!
+ vld1.64 {d14}, [r1,:64]!
+ transpose_8x8 d2, d3, d4, d5, d6, d7, d8, d9
+ transpose_8x8 d10, d11, d12, d13, d14, d15, d30, d31
+ qpel_lowpass_x2 d2, d10, d3, d11, d0, d1, \shift
+ qpel_lowpass_x2 d4, d12, d5, d13, d0, d1, \shift
+ qpel_lowpass_x2 d6, d14, d7, d15, d0, d1, \shift
+ qpel_lowpass_x2 d8, d30, d9, d31, d0, d1, \shift
+ transpose_8x8 d2, d3, d4, d5, d6, d7, d8, d9
+ .ifc \type,avg
+ vld1.64 d12, [r0,:64], r2
+ vld1.64 d13, [r0,:64], r2
+ vld1.64 d14, [r0,:64], r2
+ vld1.64 d15, [r0,:64], r2
+ vld1.64 d16, [r0,:64], r2
+ vld1.64 d17, [r0,:64], r2
+ vld1.64 d18, [r0,:64], r2
+ vld1.64 d19, [r0,:64], r2
+ sub r0, r0, r2, lsl #3
+ vrhadd.u8 q1, q1, q6
+ vrhadd.u8 q2, q2, q7
+ vrhadd.u8 q3, q3, q8
+ vrhadd.u8 q4, q4, q9
+ .endif
+ vst1.64 d2, [r0,:64], r2
+ vst1.64 d3, [r0,:64], r2
+ vst1.64 d4, [r0,:64], r2
+ vst1.64 d5, [r0,:64], r2
+ vst1.64 d6, [r0,:64], r2
+ vst1.64 d7, [r0,:64], r2
+ vst1.64 d8, [r0,:64], r2
+ vst1.64 d9, [r0,:64], r2
+ bx lr
+endfunc
+.endm
+
+ rv40_qpel8_h 5
+ rv40_qpel8_h 6
+
+.macro rv40_qpel type
+function \type\()_rv40_qpel8_h_lowpass_neon
+ .ifc \type,avg
+ mov r12, r0
+ .endif
+1:
+ vld1.8 {q2}, [r1], r2
+ vld1.8 {q3}, [r1], r2
+ qpel_lowpass_x2 d4, d5, d6, d7, d0, d1, 6
+ .ifc \type,avg
+ vld1.8 {d3}, [r12,:64], r2
+ vld1.8 {d16}, [r12,:64], r2
+ vrhadd.u8 d4, d4, d3
+ vrhadd.u8 d6, d6, d16
+ .endif
+ vst1.8 {d4}, [r0,:64], r2
+ vst1.8 {d6}, [r0,:64], r2
+ subs r3, r3, #2
+ bgt 1b
+ bx lr
+endfunc
+
+function \type\()_rv40_qpel8_v_lowpass_neon
+ vld1.64 {d2}, [r1], r2
+ vld1.64 {d3}, [r1], r2
+ vld1.64 {d4}, [r1], r2
+ vld1.64 {d5}, [r1], r2
+ vld1.64 {d6}, [r1], r2
+ vld1.64 {d7}, [r1], r2
+ vld1.64 {d8}, [r1], r2
+ vld1.64 {d9}, [r1], r2
+ vld1.64 {d10}, [r1], r2
+ vld1.64 {d11}, [r1], r2
+ vld1.64 {d12}, [r1], r2
+ vld1.64 {d13}, [r1], r2
+ vld1.64 {d14}, [r1]
+ transpose_8x8 d2, d3, d4, d5, d6, d7, d8, d9
+ transpose_8x8 d10, d11, d12, d13, d14, d15, d30, d31
+ qpel_lowpass_x2 d2, d10, d3, d11, d0, d1, 6
+ qpel_lowpass_x2 d4, d12, d5, d13, d0, d1, 6
+ qpel_lowpass_x2 d6, d14, d7, d15, d0, d1, 6
+ qpel_lowpass_x2 d8, d30, d9, d31, d0, d1, 6
+ transpose_8x8 d2, d3, d4, d5, d6, d7, d8, d9
+ .ifc \type,avg
+ vld1.64 d12, [r0,:64], r2
+ vld1.64 d13, [r0,:64], r2
+ vld1.64 d14, [r0,:64], r2
+ vld1.64 d15, [r0,:64], r2
+ vld1.64 d16, [r0,:64], r2
+ vld1.64 d17, [r0,:64], r2
+ vld1.64 d18, [r0,:64], r2
+ vld1.64 d19, [r0,:64], r2
+ sub r0, r0, r2, lsl #3
+ vrhadd.u8 q1, q1, q6
+ vrhadd.u8 q2, q2, q7
+ vrhadd.u8 q3, q3, q8
+ vrhadd.u8 q4, q4, q9
+ .endif
+ vst1.64 d2, [r0,:64], r2
+ vst1.64 d3, [r0,:64], r2
+ vst1.64 d4, [r0,:64], r2
+ vst1.64 d5, [r0,:64], r2
+ vst1.64 d6, [r0,:64], r2
+ vst1.64 d7, [r0,:64], r2
+ vst1.64 d8, [r0,:64], r2
+ vst1.64 d9, [r0,:64], r2
+ bx lr
+endfunc
+
+ rv40_qpel8_v 5, \type
+ rv40_qpel8_v 6, \type
+
+function ff_\type\()_rv40_qpel8_mc10_neon, export=1
+ sub r1, r1, #2
+ mov r3, #8
+ vmov.i8 d0, #52
+ vmov.i8 d1, #20
+ b \type\()_rv40_qpel8_h_lowpass_neon
+endfunc
+
+function ff_\type\()_rv40_qpel8_mc30_neon, export=1
+ sub r1, r1, #2
+ mov r3, #8
+ vmov.i8 d0, #20
+ vmov.i8 d1, #52
+ b \type\()_rv40_qpel8_h_lowpass_neon
+endfunc
+
+function ff_\type\()_rv40_qpel8_mc01_neon, export=1
+ push {r4, lr}
+ vpush {d8-d15}
+ sub r1, r1, r2, lsl #1
+ vmov.i8 d0, #52
+ vmov.i8 d1, #20
+ bl \type\()_rv40_qpel8_v_lowpass_neon
+ vpop {d8-d15}
+ pop {r4, pc}
+endfunc
+
+function ff_\type\()_rv40_qpel8_mc11_neon, export=1
+ push {r4, lr}
+ vpush {d8-d15}
+ sub sp, sp, #14*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ mov r3, #12
+ vmov.i8 d0, #52
+ vmov.i8 d1, #20
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+ add r1, sp, #7
+ bic r1, r1, #7
+ bl \type\()_rv40_qpel8_v_lp_packed_s6_neon
+ add sp, sp, #14*8
+ vpop {d8-d15}
+ pop {r4, pc}
+endfunc
+
+function ff_\type\()_rv40_qpel8_mc21_neon, export=1
+ push {r4, lr}
+ vpush {d8-d15}
+ sub sp, sp, #14*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ mov r3, #12
+ vmov.i8 d0, #20
+ vmov.i8 d1, #20
+ bl put_rv40_qpel8_h_lp_packed_s5_neon
+ add r1, sp, #7
+ bic r1, r1, #7
+ vmov.i8 d0, #52
+ bl \type\()_rv40_qpel8_v_lp_packed_s6_neon
+ add sp, sp, #14*8
+ vpop {d8-d15}
+ pop {r4, pc}
+endfunc
+
+function ff_\type\()_rv40_qpel8_mc31_neon, export=1
+ push {r4, lr}
+ vpush {d8-d15}
+ sub sp, sp, #14*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ mov r3, #12
+ vmov.i8 d0, #20
+ vmov.i8 d1, #52
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+ add r1, sp, #7
+ bic r1, r1, #7
+ vswp d0, d1
+ bl \type\()_rv40_qpel8_v_lp_packed_s6_neon
+ add sp, sp, #14*8
+ vpop {d8-d15}
+ pop {r4, pc}
+endfunc
+
+function ff_\type\()_rv40_qpel8_mc12_neon, export=1
+ push {r4, lr}
+ vpush {d8-d15}
+ sub sp, sp, #14*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ mov r3, #12
+ vmov.i8 d0, #52
+ vmov.i8 d1, #20
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+ add r1, sp, #7
+ bic r1, r1, #7
+ vmov.i8 d0, #20
+ bl \type\()_rv40_qpel8_v_lp_packed_s5_neon
+ add sp, sp, #14*8
+ vpop {d8-d15}
+ pop {r4, pc}
+endfunc
+
+function ff_\type\()_rv40_qpel8_mc22_neon, export=1
+ push {r4, lr}
+ vpush {d8-d15}
+ sub sp, sp, #14*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ mov r3, #12
+ vmov.i8 d0, #20
+ vmov.i8 d1, #20
+ bl put_rv40_qpel8_h_lp_packed_s5_neon
+ add r1, sp, #7
+ bic r1, r1, #7
+ bl \type\()_rv40_qpel8_v_lp_packed_s5_neon
+ add sp, sp, #14*8
+ vpop {d8-d15}
+ pop {r4, pc}
+endfunc
+
+function ff_\type\()_rv40_qpel8_mc32_neon, export=1
+ push {r4, lr}
+ vpush {d8-d15}
+ sub sp, sp, #14*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ mov r3, #12
+ vmov.i8 d0, #20
+ vmov.i8 d1, #52
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+ add r1, sp, #7
+ bic r1, r1, #7
+ vmov.i8 d1, #20
+ bl \type\()_rv40_qpel8_v_lp_packed_s5_neon
+ add sp, sp, #14*8
+ vpop {d8-d15}
+ pop {r4, pc}
+endfunc
+
+function ff_\type\()_rv40_qpel8_mc03_neon, export=1
+ push {r4, lr}
+ vpush {d8-d15}
+ sub r1, r1, r2, lsl #1
+ vmov.i8 d0, #20
+ vmov.i8 d1, #52
+ bl \type\()_rv40_qpel8_v_lowpass_neon
+ vpop {d8-d15}
+ pop {r4, pc}
+endfunc
+
+function ff_\type\()_rv40_qpel8_mc33_neon, export=1
+ mov r3, #8
+ b ff_\type\()_pixels8_xy2_neon
+endfunc
+
+function ff_\type\()_rv40_qpel8_mc13_neon, export=1
+ push {r4, lr}
+ vpush {d8-d15}
+ sub sp, sp, #14*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ mov r3, #12
+ vmov.i8 d0, #52
+ vmov.i8 d1, #20
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+ add r1, sp, #7
+ bic r1, r1, #7
+ vswp d0, d1
+ bl \type\()_rv40_qpel8_v_lp_packed_s6_neon
+ add sp, sp, #14*8
+ vpop {d8-d15}
+ pop {r4, pc}
+endfunc
+
+function ff_\type\()_rv40_qpel8_mc23_neon, export=1
+ push {r4, lr}
+ vpush {d8-d15}
+ sub sp, sp, #14*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ mov r3, #12
+ vmov.i8 d0, #20
+ vmov.i8 d1, #20
+ bl put_rv40_qpel8_h_lp_packed_s5_neon
+ add r1, sp, #7
+ bic r1, r1, #7
+ vmov.i8 d1, #52
+ bl \type\()_rv40_qpel8_v_lp_packed_s6_neon
+ add sp, sp, #14*8
+ vpop {d8-d15}
+ pop {r4, pc}
+endfunc
+
+function ff_\type\()_rv40_qpel16_mc10_neon, export=1
+ vmov.i8 d0, #52
+ vmov.i8 d1, #20
+.L\type\()_rv40_qpel16_h:
+ push {r1, lr}
+ sub r1, r1, #2
+ mov r3, #16
+ bl \type\()_rv40_qpel8_h_lowpass_neon
+ pop {r1, lr}
+ sub r0, r0, r2, lsl #4
+ add r0, r0, #8
+ add r1, r1, #6
+ mov r3, #16
+ b \type\()_rv40_qpel8_h_lowpass_neon
+endfunc
+
+function ff_\type\()_rv40_qpel16_mc30_neon, export=1
+ vmov.i8 d0, #20
+ vmov.i8 d1, #52
+ b .L\type\()_rv40_qpel16_h
+endfunc
+
+function ff_\type\()_rv40_qpel16_mc01_neon, export=1
+ vmov.i8 d0, #52
+ vmov.i8 d1, #20
+.L\type\()_rv40_qpel16_v:
+ sub r1, r1, r2, lsl #1
+ push {r1, lr}
+ vpush {d8-d15}
+ bl \type\()_rv40_qpel8_v_lowpass_neon
+ sub r1, r1, r2, lsl #2
+ bl \type\()_rv40_qpel8_v_lowpass_neon
+ ldr r1, [sp, #64]
+ sub r0, r0, r2, lsl #4
+ add r0, r0, #8
+ add r1, r1, #8
+ bl \type\()_rv40_qpel8_v_lowpass_neon
+ sub r1, r1, r2, lsl #2
+ bl \type\()_rv40_qpel8_v_lowpass_neon
+ vpop {d8-d15}
+ pop {r1, pc}
+endfunc
+
+function ff_\type\()_rv40_qpel16_mc11_neon, export=1
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ push {r1, lr}
+ vpush {d8-d15}
+ sub sp, sp, #44*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ mov r3, #20
+ vmov.i8 d0, #52
+ vmov.i8 d1, #20
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+ ldr r1, [sp, #416]
+ add r1, r1, #8
+ mov r3, #20
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+.L\type\()_rv40_qpel16_v_s6:
+ add r1, sp, #7
+ bic r1, r1, #7
+ bl \type\()_rv40_qpel8_v_lp_packed_s6_neon
+ sub r1, r1, #40
+ bl \type\()_rv40_qpel8_v_lp_packed_s6_neon
+ sub r0, r0, r2, lsl #4
+ add r0, r0, #8
+ bl \type\()_rv40_qpel8_v_lp_packed_s6_neon
+ sub r1, r1, #40
+ bl \type\()_rv40_qpel8_v_lp_packed_s6_neon
+ add sp, sp, #44*8
+ vpop {d8-d15}
+ pop {r1, pc}
+endfunc
+
+function ff_\type\()_rv40_qpel16_mc21_neon, export=1
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ push {r1, lr}
+ vpush {d8-d15}
+ sub sp, sp, #44*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ mov r3, #20
+ vmov.i8 d0, #20
+ vmov.i8 d1, #20
+ bl put_rv40_qpel8_h_lp_packed_s5_neon
+ ldr r1, [sp, #416]
+ add r1, r1, #8
+ mov r3, #20
+ bl put_rv40_qpel8_h_lp_packed_s5_neon
+ vmov.i8 d0, #52
+ b .L\type\()_rv40_qpel16_v_s6
+endfunc
+
+function ff_\type\()_rv40_qpel16_mc31_neon, export=1
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ push {r1, lr}
+ vpush {d8-d15}
+ sub sp, sp, #44*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ mov r3, #20
+ vmov.i8 d0, #20
+ vmov.i8 d1, #52
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+ ldr r1, [sp, #416]
+ add r1, r1, #8
+ mov r3, #20
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+ vswp d0, d1
+ b .L\type\()_rv40_qpel16_v_s6
+endfunc
+
+function ff_\type\()_rv40_qpel16_mc12_neon, export=1
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ push {r1, lr}
+ vpush {d8-d15}
+ sub sp, sp, #44*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ mov r3, #20
+ vmov.i8 d0, #52
+ vmov.i8 d1, #20
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+ ldr r1, [sp, #416]
+ add r1, r1, #8
+ mov r3, #20
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+ vmov.i8 d0, #20
+.L\type\()_rv40_qpel16_v_s5:
+ add r1, sp, #7
+ bic r1, r1, #7
+ bl \type\()_rv40_qpel8_v_lp_packed_s5_neon
+ sub r1, r1, #40
+ bl \type\()_rv40_qpel8_v_lp_packed_s5_neon
+ sub r0, r0, r2, lsl #4
+ add r0, r0, #8
+ bl \type\()_rv40_qpel8_v_lp_packed_s5_neon
+ sub r1, r1, #40
+ bl \type\()_rv40_qpel8_v_lp_packed_s5_neon
+ add sp, sp, #44*8
+ vpop {d8-d15}
+ pop {r1, pc}
+endfunc
+
+function ff_\type\()_rv40_qpel16_mc22_neon, export=1
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ push {r1, lr}
+ vpush {d8-d15}
+ sub sp, sp, #44*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ mov r3, #20
+ vmov.i8 d0, #20
+ vmov.i8 d1, #20
+ bl put_rv40_qpel8_h_lp_packed_s5_neon
+ ldr r1, [sp, #416]
+ add r1, r1, #8
+ mov r3, #20
+ bl put_rv40_qpel8_h_lp_packed_s5_neon
+ b .L\type\()_rv40_qpel16_v_s5
+endfunc
+
+function ff_\type\()_rv40_qpel16_mc32_neon, export=1
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ push {r1, lr}
+ vpush {d8-d15}
+ sub sp, sp, #44*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ mov r3, #20
+ vmov.i8 d0, #20
+ vmov.i8 d1, #52
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+ ldr r1, [sp, #416]
+ add r1, r1, #8
+ mov r3, #20
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+ vmov.i8 d1, #20
+ b .L\type\()_rv40_qpel16_v_s5
+endfunc
+
+function ff_\type\()_rv40_qpel16_mc03_neon, export=1
+ vmov.i8 d0, #20
+ vmov.i8 d1, #52
+ b .L\type\()_rv40_qpel16_v
+endfunc
+
+function ff_\type\()_rv40_qpel16_mc13_neon, export=1
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ push {r1, lr}
+ vpush {d8-d15}
+ sub sp, sp, #44*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ mov r3, #20
+ vmov.i8 d0, #52
+ vmov.i8 d1, #20
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+ ldr r1, [sp, #416]
+ add r1, r1, #8
+ mov r3, #20
+ bl put_rv40_qpel8_h_lp_packed_s6_neon
+ vswp d0, d1
+ b .L\type\()_rv40_qpel16_v_s6
+endfunc
+
+function ff_\type\()_rv40_qpel16_mc23_neon, export=1
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ push {r1, lr}
+ vpush {d8-d15}
+ sub sp, sp, #44*8
+ add r12, sp, #7
+ bic r12, r12, #7
+ mov r3, #20
+ vmov.i8 d0, #20
+ vmov.i8 d1, #20
+ bl put_rv40_qpel8_h_lp_packed_s5_neon
+ ldr r1, [sp, #416]
+ add r1, r1, #8
+ mov r3, #20
+ bl put_rv40_qpel8_h_lp_packed_s5_neon
+ vmov.i8 d1, #52
+ b .L\type\()_rv40_qpel16_v_s6
+endfunc
+
+function ff_\type\()_rv40_qpel16_mc33_neon, export=1
+ mov r3, #16
+ b ff_\type\()_pixels16_xy2_neon
+endfunc
+.endm
+
+ rv40_qpel put
+ rv40_qpel avg
.macro rv40_weight
vmovl.u8 q8, d2
diff --git a/libavcodec/cljr.c b/libavcodec/cljr.c
index 53ab40cbe2..183652b3a1 100644
--- a/libavcodec/cljr.c
+++ b/libavcodec/cljr.c
@@ -25,119 +25,92 @@
*/
#include "avcodec.h"
-#include "dsputil.h"
#include "get_bits.h"
+#include "put_bits.h"
-/* Disable the encoder. */
-#undef CONFIG_CLJR_ENCODER
-#define CONFIG_CLJR_ENCODER 0
-
-typedef struct CLJRContext{
+typedef struct CLJRContext {
AVCodecContext *avctx;
- AVFrame picture;
+ AVFrame picture;
} CLJRContext;
+static av_cold int common_init(AVCodecContext *avctx)
+{
+ CLJRContext * const a = avctx->priv_data;
+
+ avcodec_get_frame_defaults(&a->picture);
+ avctx->coded_frame = &a->picture;
+ a->avctx = avctx;
+
+ return 0;
+}
+
+#if CONFIG_CLJR_DECODER
static int decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
AVPacket *avpkt)
{
const uint8_t *buf = avpkt->data;
- int buf_size = avpkt->size;
+ int buf_size = avpkt->size;
CLJRContext * const a = avctx->priv_data;
GetBitContext gb;
AVFrame *picture = data;
- AVFrame * const p= (AVFrame*)&a->picture;
+ AVFrame * const p = &a->picture;
int x, y;
- if(p->data[0])
+ if (p->data[0])
avctx->release_buffer(avctx, p);
- if(buf_size/avctx->height < avctx->width) {
- av_log(avctx, AV_LOG_ERROR, "Resolution larger than buffer size. Invalid header?\n");
- return -1;
+ if (buf_size / avctx->height < avctx->width) {
+ av_log(avctx, AV_LOG_ERROR,
+ "Resolution larger than buffer size. Invalid header?\n");
+ return AVERROR_INVALIDDATA;
}
- p->reference= 0;
- if(avctx->get_buffer(avctx, p) < 0){
+ p->reference = 0;
+ if (avctx->get_buffer(avctx, p) < 0) {
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
return -1;
}
- p->pict_type= AV_PICTURE_TYPE_I;
- p->key_frame= 1;
+ p->pict_type = AV_PICTURE_TYPE_I;
+ p->key_frame = 1;
init_get_bits(&gb, buf, buf_size * 8);
- for(y=0; y<avctx->height; y++){
- uint8_t *luma= &a->picture.data[0][ y*a->picture.linesize[0] ];
- uint8_t *cb= &a->picture.data[1][ y*a->picture.linesize[1] ];
- uint8_t *cr= &a->picture.data[2][ y*a->picture.linesize[2] ];
- for(x=0; x<avctx->width; x+=4){
+ for (y = 0; y < avctx->height; y++) {
+ uint8_t *luma = &a->picture.data[0][y * a->picture.linesize[0]];
+ uint8_t *cb = &a->picture.data[1][y * a->picture.linesize[1]];
+ uint8_t *cr = &a->picture.data[2][y * a->picture.linesize[2]];
+ for (x = 0; x < avctx->width; x += 4) {
luma[3] = get_bits(&gb, 5) << 3;
luma[2] = get_bits(&gb, 5) << 3;
luma[1] = get_bits(&gb, 5) << 3;
luma[0] = get_bits(&gb, 5) << 3;
- luma+= 4;
+ luma += 4;
*(cb++) = get_bits(&gb, 6) << 2;
*(cr++) = get_bits(&gb, 6) << 2;
}
}
- *picture= *(AVFrame*)&a->picture;
+ *picture = a->picture;
*data_size = sizeof(AVPicture);
- emms_c();
-
return buf_size;
}
-#if CONFIG_CLJR_ENCODER
-static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
- CLJRContext * const a = avctx->priv_data;
- AVFrame *pict = data;
- AVFrame * const p= (AVFrame*)&a->picture;
- int size;
-
- *p = *pict;
- p->pict_type= AV_PICTURE_TYPE_I;
- p->key_frame= 1;
-
- emms_c();
-
- avpriv_align_put_bits(&a->pb);
- while(get_bit_count(&a->pb)&31)
- put_bits(&a->pb, 8, 0);
-
- size= get_bit_count(&a->pb)/32;
-
- return size*4;
-}
-#endif
-
-static av_cold void common_init(AVCodecContext *avctx){
- CLJRContext * const a = avctx->priv_data;
-
- avcodec_get_frame_defaults(&a->picture);
- avctx->coded_frame= (AVFrame*)&a->picture;
- a->avctx= avctx;
-}
-
-static av_cold int decode_init(AVCodecContext *avctx){
-
- common_init(avctx);
-
- avctx->pix_fmt= PIX_FMT_YUV411P;
-
- return 0;
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+ avctx->pix_fmt = PIX_FMT_YUV411P;
+ return common_init(avctx);
}
-#if CONFIG_CLJR_ENCODER
-static av_cold int encode_init(AVCodecContext *avctx){
-
- common_init(avctx);
+static av_cold int decode_end(AVCodecContext *avctx)
+{
+ CLJRContext *a = avctx->priv_data;
+ if (a->picture.data[0])
+ avctx->release_buffer(avctx, &a->picture);
return 0;
}
-#endif
AVCodec ff_cljr_decoder = {
.name = "cljr",
@@ -145,19 +118,55 @@ AVCodec ff_cljr_decoder = {
.id = CODEC_ID_CLJR,
.priv_data_size = sizeof(CLJRContext),
.init = decode_init,
+ .close = decode_end,
.decode = decode_frame,
.capabilities = CODEC_CAP_DR1,
- .long_name = NULL_IF_CONFIG_SMALL("Cirrus Logic AccuPak"),
+ .long_name = NULL_IF_CONFIG_SMALL("Cirrus Logic AccuPak"),
};
+#endif
#if CONFIG_CLJR_ENCODER
+static int encode_frame(AVCodecContext *avctx, unsigned char *buf,
+ int buf_size, void *data)
+{
+ PutBitContext pb;
+ AVFrame *p = data;
+ int x, y;
+
+ p->pict_type = AV_PICTURE_TYPE_I;
+ p->key_frame = 1;
+
+ init_put_bits(&pb, buf, buf_size / 8);
+
+ for (y = 0; y < avctx->height; y++) {
+ uint8_t *luma = &p->data[0][y * p->linesize[0]];
+ uint8_t *cb = &p->data[1][y * p->linesize[1]];
+ uint8_t *cr = &p->data[2][y * p->linesize[2]];
+ for (x = 0; x < avctx->width; x += 4) {
+ put_bits(&pb, 5, luma[3] >> 3);
+ put_bits(&pb, 5, luma[2] >> 3);
+ put_bits(&pb, 5, luma[1] >> 3);
+ put_bits(&pb, 5, luma[0] >> 3);
+ luma += 4;
+ put_bits(&pb, 6, *(cb++) >> 2);
+ put_bits(&pb, 6, *(cr++) >> 2);
+ }
+ }
+
+ flush_put_bits(&pb);
+
+ return put_bits_count(&pb) / 8;
+}
+
AVCodec ff_cljr_encoder = {
.name = "cljr",
.type = AVMEDIA_TYPE_VIDEO,
.id = CODEC_ID_CLJR,
.priv_data_size = sizeof(CLJRContext),
- .init = encode_init,
+ .init = common_init,
.encode = encode_frame,
- .long_name = NULL_IF_CONFIG_SMALL("Cirrus Logic AccuPak"),
+ .pix_fmts = (const enum PixelFormat[]) { PIX_FMT_YUV411P,
+ PIX_FMT_NONE },
+ .long_name = NULL_IF_CONFIG_SMALL("Cirrus Logic AccuPak"),
};
#endif
diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c
index e3203efe89..dd6987b459 100644
--- a/libavcodec/dct-test.c
+++ b/libavcodec/dct-test.c
@@ -170,7 +170,7 @@ static const struct algo idct_tab[] = {
#define AANSCALE_BITS 12
-uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
+static uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
static int64_t gettime(void)
{
diff --git a/libavcodec/fft-test.c b/libavcodec/fft-test.c
index be105fe834..1c88a53fdd 100644
--- a/libavcodec/fft-test.c
+++ b/libavcodec/fft-test.c
@@ -37,8 +37,6 @@
#include <stdlib.h>
#include <string.h>
-#undef exit
-
/* reference fft */
#define MUL16(a,b) ((a) * (b))
@@ -228,7 +226,6 @@ static void help(void)
"-n b set the transform size to 2^b\n"
"-f x set scale factor for output data of (I)MDCT to x\n"
);
- exit(1);
}
enum tf_transform {
@@ -267,7 +264,7 @@ int main(int argc, char **argv)
switch(c) {
case 'h':
help();
- break;
+ return 1;
case 's':
do_speed = 1;
break;
diff --git a/libavcodec/motion-test.c b/libavcodec/motion-test.c
index 85ea1045b4..f187183c12 100644
--- a/libavcodec/motion-test.c
+++ b/libavcodec/motion-test.c
@@ -33,14 +33,13 @@
#include "dsputil.h"
#include "libavutil/lfg.h"
-#undef exit
#undef printf
#define WIDTH 64
#define HEIGHT 64
-uint8_t img1[WIDTH * HEIGHT];
-uint8_t img2[WIDTH * HEIGHT];
+static uint8_t img1[WIDTH * HEIGHT];
+static uint8_t img2[WIDTH * HEIGHT];
static void fill_random(uint8_t *tab, int size)
{
@@ -61,7 +60,6 @@ static void help(void)
{
printf("motion-test [-h]\n"
"test motion implementations\n");
- exit(1);
}
static int64_t gettime(void)
@@ -138,7 +136,7 @@ int main(int argc, char **argv)
switch(c) {
case 'h':
help();
- break;
+ return 1;
}
}
diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index f0a173cc7e..e0d3f6f986 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c
@@ -176,7 +176,7 @@ static void fix_bitshift(ShortenContext *s, int32_t *buffer)
}
-static void init_offset(ShortenContext *s)
+static int init_offset(ShortenContext *s)
{
int32_t mean = 0;
int chan, i;
@@ -190,12 +190,13 @@ static void init_offset(ShortenContext *s)
break;
default:
av_log(s->avctx, AV_LOG_ERROR, "unknown audio type");
- abort();
+ return AVERROR_INVALIDDATA;
}
for (chan = 0; chan < s->channels; chan++)
for (i = 0; i < nblock; i++)
s->offset[chan][i] = mean;
+ return 0;
}
static int decode_wave_header(AVCodecContext *avctx, const uint8_t *header,
@@ -367,7 +368,8 @@ static int read_header(ShortenContext *s)
if ((ret = allocate_buffers(s)) < 0)
return ret;
- init_offset(s);
+ if ((ret = init_offset(s)) < 0)
+ return ret;
if (s->version > 1)
s->lpcqoffset = V2LPCQOFFSET;
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 9042e26e2f..0e2c58e852 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -21,8 +21,8 @@
#define AVCODEC_VERSION_H
#define LIBAVCODEC_VERSION_MAJOR 53
-#define LIBAVCODEC_VERSION_MINOR 41
-#define LIBAVCODEC_VERSION_MICRO 2
+#define LIBAVCODEC_VERSION_MINOR 42
+#define LIBAVCODEC_VERSION_MICRO 0
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
LIBAVCODEC_VERSION_MINOR, \