aboutsummaryrefslogtreecommitdiffstats
path: root/ffmpeg.h
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2016-11-22 22:58:35 +0200
committerMartin Storsjö <martin@martin.st>2017-03-11 13:14:25 +0200
commit9532a7d4d060f2a2741225a76945daed52dbc478 (patch)
treec79b4c2db06447907fb34ce7c7d031c13f03305a /ffmpeg.h
parent824589556cb7c4bfafb8a0190e71a10c628f5339 (diff)
downloadffmpeg-9532a7d4d060f2a2741225a76945daed52dbc478.tar.gz
aarch64: vp9itxfm: Do separate functions for half/quarter idct16 and idct32
This work is sponsored by, and copyright, Google. This avoids loading and calculating coefficients that we know will be zero, and avoids filling the temp buffer with zeros in places where we know the second pass won't read. This gives a pretty substantial speedup for the smaller subpartitions. The code size increases from 14740 bytes to 24292 bytes. The idct16/32_end macros are moved above the individual functions; the instructions themselves are unchanged, but since new functions are added at the same place where the code is moved from, the diff looks rather messy. Before: vp9_inv_dct_dct_16x16_sub1_add_neon: 236.7 vp9_inv_dct_dct_16x16_sub2_add_neon: 1051.0 vp9_inv_dct_dct_16x16_sub4_add_neon: 1051.0 vp9_inv_dct_dct_16x16_sub8_add_neon: 1051.0 vp9_inv_dct_dct_16x16_sub12_add_neon: 1387.4 vp9_inv_dct_dct_16x16_sub16_add_neon: 1387.6 vp9_inv_dct_dct_32x32_sub1_add_neon: 554.1 vp9_inv_dct_dct_32x32_sub2_add_neon: 5198.5 vp9_inv_dct_dct_32x32_sub4_add_neon: 5198.6 vp9_inv_dct_dct_32x32_sub8_add_neon: 5196.3 vp9_inv_dct_dct_32x32_sub12_add_neon: 6183.4 vp9_inv_dct_dct_32x32_sub16_add_neon: 6174.3 vp9_inv_dct_dct_32x32_sub20_add_neon: 7151.4 vp9_inv_dct_dct_32x32_sub24_add_neon: 7145.3 vp9_inv_dct_dct_32x32_sub28_add_neon: 8119.3 vp9_inv_dct_dct_32x32_sub32_add_neon: 8118.7 After: vp9_inv_dct_dct_16x16_sub1_add_neon: 236.7 vp9_inv_dct_dct_16x16_sub2_add_neon: 640.8 vp9_inv_dct_dct_16x16_sub4_add_neon: 639.0 vp9_inv_dct_dct_16x16_sub8_add_neon: 842.0 vp9_inv_dct_dct_16x16_sub12_add_neon: 1388.3 vp9_inv_dct_dct_16x16_sub16_add_neon: 1389.3 vp9_inv_dct_dct_32x32_sub1_add_neon: 554.1 vp9_inv_dct_dct_32x32_sub2_add_neon: 3685.5 vp9_inv_dct_dct_32x32_sub4_add_neon: 3685.1 vp9_inv_dct_dct_32x32_sub8_add_neon: 3684.4 vp9_inv_dct_dct_32x32_sub12_add_neon: 5312.2 vp9_inv_dct_dct_32x32_sub16_add_neon: 5315.4 vp9_inv_dct_dct_32x32_sub20_add_neon: 7154.9 vp9_inv_dct_dct_32x32_sub24_add_neon: 7154.5 vp9_inv_dct_dct_32x32_sub28_add_neon: 8126.6 vp9_inv_dct_dct_32x32_sub32_add_neon: 8127.2 This is cherrypicked from libav commit a63da4511d0fee66695ff4afd264ba1dbf1e812d. Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'ffmpeg.h')
0 files changed, 0 insertions, 0 deletions