diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2011-10-21 00:00:39 -0700 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2011-10-21 01:00:45 -0700 |
commit | c2d337429c7c87ee559efe54dbc0f84f2a25c3a4 (patch) | |
tree | 2aae10d3e5c36d3c3c45b9a8970999cc5c1429f6 /libavcodec/arm | |
parent | 229d263cc914b5396847f7249fdda2e6ded9ec1b (diff) | |
download | ffmpeg-c2d337429c7c87ee559efe54dbc0f84f2a25c3a4.tar.gz |
H264: change weight/biweight functions to take a height argument.
Neon parts by Mans Rullgard <mans@mansr.com>.
Diffstat (limited to 'libavcodec/arm')
-rw-r--r-- | libavcodec/arm/h264dsp_init_arm.c | 77 | ||||
-rw-r--r-- | libavcodec/arm/h264dsp_neon.S | 86 |
2 files changed, 46 insertions, 117 deletions
diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c index c1ca217add..1c331a495d 100644 --- a/libavcodec/arm/h264dsp_init_arm.c +++ b/libavcodec/arm/h264dsp_init_arm.c @@ -32,47 +32,22 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); -void ff_weight_h264_pixels_16x16_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); -void ff_weight_h264_pixels_16x8_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); -void ff_weight_h264_pixels_8x16_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); -void ff_weight_h264_pixels_8x8_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); -void ff_weight_h264_pixels_8x4_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); -void ff_weight_h264_pixels_4x8_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); -void ff_weight_h264_pixels_4x4_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); -void ff_weight_h264_pixels_4x2_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); +void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height, + int log2_den, int weight, int offset); +void ff_weight_h264_pixels_8_neon(uint8_t *dst, int stride, int height, + int log2_den, int weight, int offset); +void ff_weight_h264_pixels_4_neon(uint8_t *dst, int stride, int height, + int log2_den, int weight, int offset); -void ff_biweight_h264_pixels_16x16_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); -void ff_biweight_h264_pixels_16x8_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); -void ff_biweight_h264_pixels_8x16_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); -void ff_biweight_h264_pixels_8x8_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); -void ff_biweight_h264_pixels_8x4_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); -void ff_biweight_h264_pixels_4x8_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); -void ff_biweight_h264_pixels_4x4_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); -void ff_biweight_h264_pixels_4x2_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); +void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, int stride, + int height, int log2_den, int weightd, + int weights, int offset); +void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, int stride, + int height, int log2_den, int weightd, + int weights, int offset); +void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, int stride, + int height, int log2_den, int weightd, + int weights, int offset); void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); @@ -100,23 +75,13 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const i c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; - c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16x16_neon; - c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_16x8_neon; - c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_8x16_neon; - c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels_8x8_neon; - c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels_8x4_neon; - c->weight_h264_pixels_tab[5] = ff_weight_h264_pixels_4x8_neon; - c->weight_h264_pixels_tab[6] = ff_weight_h264_pixels_4x4_neon; - c->weight_h264_pixels_tab[7] = ff_weight_h264_pixels_4x2_neon; + c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon; + c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon; + c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon; - c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16x16_neon; - c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_16x8_neon; - c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_8x16_neon; - c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels_8x8_neon; - c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels_8x4_neon; - c->biweight_h264_pixels_tab[5] = ff_biweight_h264_pixels_4x8_neon; - c->biweight_h264_pixels_tab[6] = ff_biweight_h264_pixels_4x4_neon; - c->biweight_h264_pixels_tab[7] = ff_biweight_h264_pixels_4x2_neon; + c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon; + c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon; + c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon; c->h264_idct_add = ff_h264_idct_add_neon; c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S index 0fa4a6b0a5..3d2c6746ae 100644 --- a/libavcodec/arm/h264dsp_neon.S +++ b/libavcodec/arm/h264dsp_neon.S @@ -1592,7 +1592,7 @@ endfunc vdup.8 d1, r5 vmov q2, q8 vmov q3, q8 -1: subs ip, ip, #2 +1: subs r3, r3, #2 vld1.8 {d20-d21},[r0,:128], r2 \macd q2, d0, d20 pld [r0] @@ -1632,7 +1632,7 @@ endfunc vdup.8 d1, r5 vmov q1, q8 vmov q10, q8 -1: subs ip, ip, #2 +1: subs r3, r3, #2 vld1.8 {d4},[r0,:64], r2 \macd q1, d0, d4 pld [r0] @@ -1662,7 +1662,7 @@ endfunc vdup.8 d1, r5 vmov q1, q8 vmov q10, q8 -1: subs ip, ip, #4 +1: subs r3, r3, #4 vld1.32 {d4[0]},[r0,:32], r2 vld1.32 {d4[1]},[r0,:32], r2 \macd q1, d0, d4 @@ -1700,16 +1700,17 @@ endfunc .endm .macro biweight_func w -function biweight_h264_pixels_\w\()_neon +function ff_biweight_h264_pixels_\w\()_neon, export=1 push {r4-r6, lr} - add r4, sp, #16 + ldr r12, [sp, #16] + add r4, sp, #20 ldm r4, {r4-r6} lsr lr, r4, #31 add r6, r6, #1 eors lr, lr, r5, lsr #30 orr r6, r6, #1 - vdup.16 q9, r3 - lsl r6, r6, r3 + vdup.16 q9, r12 + lsl r6, r6, r12 vmvn q9, q9 vdup.16 q8, r6 mov r6, r0 @@ -1730,34 +1731,15 @@ function biweight_h264_pixels_\w\()_neon endfunc .endm - .macro biweight_entry w, h, b=1 -function ff_biweight_h264_pixels_\w\()x\h\()_neon, export=1 - mov ip, #\h -.if \b - b biweight_h264_pixels_\w\()_neon -.endif -endfunc - .endm - - biweight_entry 16, 8 - biweight_entry 16, 16, b=0 biweight_func 16 - - biweight_entry 8, 16 - biweight_entry 8, 4 - biweight_entry 8, 8, b=0 biweight_func 8 - - biweight_entry 4, 8 - biweight_entry 4, 2 - biweight_entry 4, 4, b=0 biweight_func 4 @ Weighted prediction .macro weight_16 add - vdup.8 d0, r3 -1: subs ip, ip, #2 + vdup.8 d0, r12 +1: subs r2, r2, #2 vld1.8 {d20-d21},[r0,:128], r1 vmull.u8 q2, d0, d20 pld [r0] @@ -1785,8 +1767,8 @@ endfunc .endm .macro weight_8 add - vdup.8 d0, r3 -1: subs ip, ip, #2 + vdup.8 d0, r12 +1: subs r2, r2, #2 vld1.8 {d4},[r0,:64], r1 vmull.u8 q1, d0, d4 pld [r0] @@ -1806,10 +1788,10 @@ endfunc .endm .macro weight_4 add - vdup.8 d0, r3 + vdup.8 d0, r12 vmov q1, q8 vmov q10, q8 -1: subs ip, ip, #4 +1: subs r2, r2, #4 vld1.32 {d4[0]},[r0,:32], r1 vld1.32 {d4[1]},[r0,:32], r1 vmull.u8 q1, d0, d4 @@ -1842,50 +1824,32 @@ endfunc .endm .macro weight_func w -function weight_h264_pixels_\w\()_neon +function ff_weight_h264_pixels_\w\()_neon, export=1 push {r4, lr} - ldr r4, [sp, #8] - cmp r2, #1 - lsl r4, r4, r2 + ldr r12, [sp, #8] + ldr r4, [sp, #12] + cmp r3, #1 + lsl r4, r4, r3 vdup.16 q8, r4 mov r4, r0 ble 20f - rsb lr, r2, #1 + rsb lr, r3, #1 vdup.16 q9, lr - cmp r3, #0 + cmp r12, #0 blt 10f weight_\w vhadd.s16 -10: rsb r3, r3, #0 +10: rsb r12, r12, #0 weight_\w vhsub.s16 -20: rsb lr, r2, #0 +20: rsb lr, r3, #0 vdup.16 q9, lr - cmp r3, #0 + cmp r12, #0 blt 10f weight_\w vadd.s16 -10: rsb r3, r3, #0 +10: rsb r12, r12, #0 weight_\w vsub.s16 endfunc .endm - .macro weight_entry w, h, b=1 -function ff_weight_h264_pixels_\w\()x\h\()_neon, export=1 - mov ip, #\h -.if \b - b weight_h264_pixels_\w\()_neon -.endif -endfunc - .endm - - weight_entry 16, 8 - weight_entry 16, 16, b=0 weight_func 16 - - weight_entry 8, 16 - weight_entry 8, 4 - weight_entry 8, 8, b=0 weight_func 8 - - weight_entry 4, 8 - weight_entry 4, 2 - weight_entry 4, 4, b=0 weight_func 4 |