diff options
author | Måns Rullgård <mans@mansr.com> | 2010-01-13 16:46:28 +0000 |
---|---|---|
committer | Måns Rullgård <mans@mansr.com> | 2010-01-13 16:46:28 +0000 |
commit | c816d3d0425b94e95e182d53d94b528244d1f133 (patch) | |
tree | c6add8eb7a684ee6f9bbcd12eaaa32182ecc423d /libavcodec/aactab.c | |
parent | 9d50d3962905b3938e184932f4451e807c0f43fb (diff) | |
download | ffmpeg-c816d3d0425b94e95e182d53d94b528244d1f133.tar.gz |
AAC: Compress codebook tables and optimise sign bit handling
The codebooks each consist of small number of values repeated in
groups of 2 or 4. Storing the codebooks as a packed list of 2- or
4-bit indexes into a table reduces their size substantially (from 7.5k
to 1.5k), resulting in less cache pressure.
For the band types with sign bits in the bitstream, storing the number
and position of non-zero codebook values using a few bits avoids
multiple get_bits() calls and floating-point comparisons which gcc
handles miserably.
Some float/int type punning also avoids gcc brain damage.
Overall speedup 20-35% on Cortex-A8, 20% on Core i7.
Originally committed as revision 21188 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/aactab.c')
-rw-r--r-- | libavcodec/aactab.c | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/libavcodec/aactab.c b/libavcodec/aactab.c index 22a42bed43..bf1fea9946 100644 --- a/libavcodec/aactab.c +++ b/libavcodec/aactab.c @@ -899,6 +899,192 @@ const float * const ff_aac_codebook_vectors[] = { codebook_vector8, codebook_vector10, }; +static const float codebook_vector0_vals[] = { + -1.0000000, 0.0000000, 1.0000000 +}; + +static const float codebook_vector2_vals[] = { + 0.0000000, 1.0000000, 2.5198421, +}; + +/* + * bits 0:1, 2:3, 4:5, 6:7 index into _vals array + * 8:11 number of non-zero values + * 12:15 bit mask of non-zero values + */ +static const uint16_t codebook_vector02_idx[] = { + 0x0000, 0x8140, 0x8180, 0x4110, 0xc250, 0xc290, 0x4120, 0xc260, 0xc2a0, + 0x2104, 0xa244, 0xa284, 0x6214, 0xe354, 0xe394, 0x6224, 0xe364, 0xe3a4, + 0x2108, 0xa248, 0xa288, 0x6218, 0xe358, 0xe398, 0x6228, 0xe368, 0xe3a8, + 0x1101, 0x9241, 0x9281, 0x5211, 0xd351, 0xd391, 0x5221, 0xd361, 0xd3a1, + 0x3205, 0xb345, 0xb385, 0x7315, 0xf455, 0xf495, 0x7325, 0xf465, 0xf4a5, + 0x3209, 0xb349, 0xb389, 0x7319, 0xf459, 0xf499, 0x7329, 0xf469, 0xf4a9, + 0x1102, 0x9242, 0x9282, 0x5212, 0xd352, 0xd392, 0x5222, 0xd362, 0xd3a2, + 0x3206, 0xb346, 0xb386, 0x7316, 0xf456, 0xf496, 0x7326, 0xf466, 0xf4a6, + 0x320a, 0xb34a, 0xb38a, 0x731a, 0xf45a, 0xf49a, 0x732a, 0xf46a, 0xf4aa, +}; + +static const float codebook_vector4_vals[] = { + -6.3496042, -4.3267487, + -2.5198421, -1.0000000, + 0.0000000, 1.0000000, + 2.5198421, 4.3267487, + 6.3496042, +}; + +/* + * bits 0:3, 4:7 index into _vals array + */ +static const uint16_t codebook_vector4_idx[] = { + 0x0000, 0x0010, 0x0020, 0x0030, 0x0040, 0x0050, 0x0060, 0x0070, 0x0080, + 0x0001, 0x0011, 0x0021, 0x0031, 0x0041, 0x0051, 0x0061, 0x0071, 0x0081, + 0x0002, 0x0012, 0x0022, 0x0032, 0x0042, 0x0052, 0x0062, 0x0072, 0x0082, + 0x0003, 0x0013, 0x0023, 0x0033, 0x0043, 0x0053, 0x0063, 0x0073, 0x0083, + 0x0004, 0x0014, 0x0024, 0x0034, 0x0044, 0x0054, 0x0064, 0x0074, 0x0084, + 0x0005, 0x0015, 0x0025, 0x0035, 0x0045, 0x0055, 0x0065, 0x0075, 0x0085, + 0x0006, 0x0016, 0x0026, 0x0036, 0x0046, 0x0056, 0x0066, 0x0076, 0x0086, + 0x0007, 0x0017, 0x0027, 0x0037, 0x0047, 0x0057, 0x0067, 0x0077, 0x0087, + 0x0008, 0x0018, 0x0028, 0x0038, 0x0048, 0x0058, 0x0068, 0x0078, 0x0088, +}; + +static const float codebook_vector6_vals[] = { + 0.0000000, 1.0000000, 2.5198421, 4.3267487, + 6.3496042, 8.5498797, 10.9027236, 13.3905183, +}; + +/* + * bits 0:3, 4:7 index into _vals array + * 8:11 number of non-zero values + * 12:15 1: only second value non-zero + * 0: other cases + */ +static const uint16_t codebook_vector6_idx[] = { + 0x0000, 0x0110, 0x0120, 0x0130, 0x0140, 0x0150, 0x0160, 0x0170, + 0x1101, 0x0211, 0x0221, 0x0231, 0x0241, 0x0251, 0x0261, 0x0271, + 0x1102, 0x0212, 0x0222, 0x0232, 0x0242, 0x0252, 0x0262, 0x0272, + 0x1103, 0x0213, 0x0223, 0x0233, 0x0243, 0x0253, 0x0263, 0x0273, + 0x1104, 0x0214, 0x0224, 0x0234, 0x0244, 0x0254, 0x0264, 0x0274, + 0x1105, 0x0215, 0x0225, 0x0235, 0x0245, 0x0255, 0x0265, 0x0275, + 0x1106, 0x0216, 0x0226, 0x0236, 0x0246, 0x0256, 0x0266, 0x0276, + 0x1107, 0x0217, 0x0227, 0x0237, 0x0247, 0x0257, 0x0267, 0x0277, +}; + +static const float codebook_vector8_vals[] = { + 0.0000000, 1.0000000, + 2.5198421, 4.3267487, + 6.3496042, 8.5498797, + 10.9027236, 13.3905183, + 16.0000000, 18.7207544, + 21.5443469, 24.4637810, + 27.4731418, +}; + +/* + * bits 0:3, 4:7 index into _vals array + * 8:11 number of non-zero values + * 12:15 1: only second value non-zero + * 0: other cases + */ +static const uint16_t codebook_vector8_idx[] = { + 0x0000, 0x0110, 0x0120, 0x0130, 0x0140, 0x0150, 0x0160, + 0x0170, 0x0180, 0x0190, 0x01a0, 0x01b0, 0x01c0, + 0x1101, 0x0211, 0x0221, 0x0231, 0x0241, 0x0251, 0x0261, + 0x0271, 0x0281, 0x0291, 0x02a1, 0x02b1, 0x02c1, + 0x1102, 0x0212, 0x0222, 0x0232, 0x0242, 0x0252, 0x0262, + 0x0272, 0x0282, 0x0292, 0x02a2, 0x02b2, 0x02c2, + 0x1103, 0x0213, 0x0223, 0x0233, 0x0243, 0x0253, 0x0263, + 0x0273, 0x0283, 0x0293, 0x02a3, 0x02b3, 0x02c3, + 0x1104, 0x0214, 0x0224, 0x0234, 0x0244, 0x0254, 0x0264, + 0x0274, 0x0284, 0x0294, 0x02a4, 0x02b4, 0x02c4, + 0x1105, 0x0215, 0x0225, 0x0235, 0x0245, 0x0255, 0x0265, + 0x0275, 0x0285, 0x0295, 0x02a5, 0x02b5, 0x02c5, + 0x1106, 0x0216, 0x0226, 0x0236, 0x0246, 0x0256, 0x0266, + 0x0276, 0x0286, 0x0296, 0x02a6, 0x02b6, 0x02c6, + 0x1107, 0x0217, 0x0227, 0x0237, 0x0247, 0x0257, 0x0267, + 0x0277, 0x0287, 0x0297, 0x02a7, 0x02b7, 0x02c7, + 0x1108, 0x0218, 0x0228, 0x0238, 0x0248, 0x0258, 0x0268, + 0x0278, 0x0288, 0x0298, 0x02a8, 0x02b8, 0x02c8, + 0x1109, 0x0219, 0x0229, 0x0239, 0x0249, 0x0259, 0x0269, + 0x0279, 0x0289, 0x0299, 0x02a9, 0x02b9, 0x02c9, + 0x110a, 0x021a, 0x022a, 0x023a, 0x024a, 0x025a, 0x026a, + 0x027a, 0x028a, 0x029a, 0x02aa, 0x02ba, 0x02ca, + 0x110b, 0x021b, 0x022b, 0x023b, 0x024b, 0x025b, 0x026b, + 0x027b, 0x028b, 0x029b, 0x02ab, 0x02bb, 0x02cb, + 0x110c, 0x021c, 0x022c, 0x023c, 0x024c, 0x025c, 0x026c, + 0x027c, 0x028c, 0x029c, 0x02ac, 0x02bc, 0x02cc, +}; + +static const float codebook_vector10_vals[] = { + 0.0000000, 1.0000000, + 2.5198421, 4.3267487, + 6.3496042, 8.5498797, + 10.9027236, 13.3905183, + 16.0000000, 18.7207544, + 21.5443469, 24.4637810, + 27.4731418, 30.5673509, + 33.7419917, 36.9931811, +}; + +/* + * bits 0:3, 4:7 index into _vals array + * 8:9 bit mask of escape-coded entries + * 12:15 number of non-zero values + */ +static const uint16_t codebook_vector10_idx[] = { + 0x0000, 0x1010, 0x1020, 0x1030, 0x1040, 0x1050, 0x1060, 0x1070, + 0x1080, 0x1090, 0x10a0, 0x10b0, 0x10c0, 0x10d0, 0x10e0, 0x10f0, 0x1200, + 0x1001, 0x2011, 0x2021, 0x2031, 0x2041, 0x2051, 0x2061, 0x2071, + 0x2081, 0x2091, 0x20a1, 0x20b1, 0x20c1, 0x20d1, 0x20e1, 0x20f1, 0x2201, + 0x1002, 0x2012, 0x2022, 0x2032, 0x2042, 0x2052, 0x2062, 0x2072, + 0x2082, 0x2092, 0x20a2, 0x20b2, 0x20c2, 0x20d2, 0x20e2, 0x20f2, 0x2202, + 0x1003, 0x2013, 0x2023, 0x2033, 0x2043, 0x2053, 0x2063, 0x2073, + 0x2083, 0x2093, 0x20a3, 0x20b3, 0x20c3, 0x20d3, 0x20e3, 0x20f3, 0x2203, + 0x1004, 0x2014, 0x2024, 0x2034, 0x2044, 0x2054, 0x2064, 0x2074, + 0x2084, 0x2094, 0x20a4, 0x20b4, 0x20c4, 0x20d4, 0x20e4, 0x20f4, 0x2204, + 0x1005, 0x2015, 0x2025, 0x2035, 0x2045, 0x2055, 0x2065, 0x2075, + 0x2085, 0x2095, 0x20a5, 0x20b5, 0x20c5, 0x20d5, 0x20e5, 0x20f5, 0x2205, + 0x1006, 0x2016, 0x2026, 0x2036, 0x2046, 0x2056, 0x2066, 0x2076, + 0x2086, 0x2096, 0x20a6, 0x20b6, 0x20c6, 0x20d6, 0x20e6, 0x20f6, 0x2206, + 0x1007, 0x2017, 0x2027, 0x2037, 0x2047, 0x2057, 0x2067, 0x2077, + 0x2087, 0x2097, 0x20a7, 0x20b7, 0x20c7, 0x20d7, 0x20e7, 0x20f7, 0x2207, + 0x1008, 0x2018, 0x2028, 0x2038, 0x2048, 0x2058, 0x2068, 0x2078, + 0x2088, 0x2098, 0x20a8, 0x20b8, 0x20c8, 0x20d8, 0x20e8, 0x20f8, 0x2208, + 0x1009, 0x2019, 0x2029, 0x2039, 0x2049, 0x2059, 0x2069, 0x2079, + 0x2089, 0x2099, 0x20a9, 0x20b9, 0x20c9, 0x20d9, 0x20e9, 0x20f9, 0x2209, + 0x100a, 0x201a, 0x202a, 0x203a, 0x204a, 0x205a, 0x206a, 0x207a, + 0x208a, 0x209a, 0x20aa, 0x20ba, 0x20ca, 0x20da, 0x20ea, 0x20fa, 0x220a, + 0x100b, 0x201b, 0x202b, 0x203b, 0x204b, 0x205b, 0x206b, 0x207b, + 0x208b, 0x209b, 0x20ab, 0x20bb, 0x20cb, 0x20db, 0x20eb, 0x20fb, 0x220b, + 0x100c, 0x201c, 0x202c, 0x203c, 0x204c, 0x205c, 0x206c, 0x207c, + 0x208c, 0x209c, 0x20ac, 0x20bc, 0x20cc, 0x20dc, 0x20ec, 0x20fc, 0x220c, + 0x100d, 0x201d, 0x202d, 0x203d, 0x204d, 0x205d, 0x206d, 0x207d, + 0x208d, 0x209d, 0x20ad, 0x20bd, 0x20cd, 0x20dd, 0x20ed, 0x20fd, 0x220d, + 0x100e, 0x201e, 0x202e, 0x203e, 0x204e, 0x205e, 0x206e, 0x207e, + 0x208e, 0x209e, 0x20ae, 0x20be, 0x20ce, 0x20de, 0x20ee, 0x20fe, 0x220e, + 0x100f, 0x201f, 0x202f, 0x203f, 0x204f, 0x205f, 0x206f, 0x207f, + 0x208f, 0x209f, 0x20af, 0x20bf, 0x20cf, 0x20df, 0x20ef, 0x20ff, 0x220f, + 0x1100, 0x2110, 0x2120, 0x2130, 0x2140, 0x2150, 0x2160, 0x2170, + 0x2180, 0x2190, 0x21a0, 0x21b0, 0x21c0, 0x21d0, 0x21e0, 0x21f0, 0x2300, +}; + +const float *const ff_aac_codebook_vector_vals[] = { + codebook_vector0_vals, codebook_vector0_vals, + codebook_vector2_vals, codebook_vector2_vals, + codebook_vector4_vals, codebook_vector4_vals, + codebook_vector6_vals, codebook_vector6_vals, + codebook_vector8_vals, codebook_vector8_vals, + codebook_vector10_vals, +}; + +const uint16_t *const ff_aac_codebook_vector_idx[] = { + codebook_vector02_idx, codebook_vector02_idx, + codebook_vector02_idx, codebook_vector02_idx, + codebook_vector4_idx, codebook_vector4_idx, + codebook_vector6_idx, codebook_vector6_idx, + codebook_vector8_idx, codebook_vector8_idx, + codebook_vector10_idx, +}; + /* @name swb_offsets * Sample offset into the window indicating the beginning of a scalefactor * window band |