diff options
author | Kostya Shishkov <kostya.shishkov@gmail.com> | 2020-02-20 11:00:24 +0100 |
---|---|---|
committer | Kostya Shishkov <kostya.shishkov@gmail.com> | 2020-02-20 11:00:24 +0100 |
commit | b4d5b8515e75383b4fc59ea2813c90c615d59a96 (patch) | |
tree | cf9ea1f458965eea90dff60a607dc90bf42887b3 /nihav-codec-support/src/dsp | |
parent | 2b8bf9a03242bbd6e80091082a50ec13b1a95143 (diff) | |
download | nihav-b4d5b8515e75383b4fc59ea2813c90c615d59a96.tar.gz |
split nihav-codec-support crate from nihav-core
The former is intended just for NihAV decoders, the latter is for both
NihAV crates and for the code using NihAV.
Diffstat (limited to 'nihav-codec-support/src/dsp')
-rw-r--r-- | nihav-codec-support/src/dsp/dct.rs | 512 | ||||
-rw-r--r-- | nihav-codec-support/src/dsp/fft.rs | 912 | ||||
-rw-r--r-- | nihav-codec-support/src/dsp/mdct.rs | 81 | ||||
-rw-r--r-- | nihav-codec-support/src/dsp/mod.rs | 11 | ||||
-rw-r--r-- | nihav-codec-support/src/dsp/window.rs | 59 |
5 files changed, 1575 insertions, 0 deletions
diff --git a/nihav-codec-support/src/dsp/dct.rs b/nihav-codec-support/src/dsp/dct.rs new file mode 100644 index 0000000..2a74449 --- /dev/null +++ b/nihav-codec-support/src/dsp/dct.rs @@ -0,0 +1,512 @@ +//! Discrete 1-D cosine and sine transforms. +use std::f32::consts; + +/// A list of DCT and DST modes. +#[allow(non_camel_case_types)] +#[derive(Clone,Copy,Debug,PartialEq)] +pub enum DCTMode { + DCT_I, + DCT_II, + DCT_III, + DCT_IV, + DST_I, + DST_II, + DST_III, + DST_IV, +} + +/// DCT/DST working context. +#[allow(dead_code)] +pub struct DCT { + tmp: Vec<f32>, + tab: Vec<f32>, + swaps: Vec<usize>, + perms: Vec<usize>, + mode: DCTMode, + size: usize, + is_pow2: bool, + perm_tab: Vec<usize>, +} + +impl DCT { + /// Constructs a new context for the selected DCT or DST operation. + pub fn new(mode: DCTMode, size: usize) -> Self { + let bits = 31 - (size as u32).leading_zeros(); + let is_pow2 = (size & (size - 1)) == 0; + let mut tmp: Vec<f32>; + let mut swaps: Vec<usize> = Vec::new(); + let mut perms: Vec<usize>; + let mut perm_tab: Vec<usize>; + tmp = Vec::with_capacity(size); + tmp.resize(size, 0.0); + if !is_pow2 { + perms = Vec::new(); + perm_tab = Vec::new(); + } else { + perms = Vec::with_capacity(size); + for i in 0..size { perms.push(swp_idx(i, bits)); } + gen_swaps_for_perm(&mut swaps, &perms); + + perm_tab = Vec::with_capacity(size); + perm_tab.push(0); // padding + perm_tab.push(0); // size = 1 + perm_tab.push(0); // size = 2 + perm_tab.push(1); + for blen in 2..=bits { + let ssize = 1 << blen; + for i in 0..ssize { perm_tab.push(swp_idx(i, blen)); } + } + } + let mut tab: Vec<f32>; + match mode { + DCTMode::DCT_II | + DCTMode::DST_II | + DCTMode::DCT_III | + DCTMode::DST_III | + DCTMode::DCT_IV => { + tab = Vec::with_capacity(size * 2); + tab.push(1.0); // padding + tab.push(0.0); + tab.push((consts::PI / 8.0).sin()); // size = 1 + tab.push((consts::PI / 8.0).cos()); + if bits > 1 { + for blen in 1..=bits { + let tsize = 1 << blen; + let base = consts::PI / ((tsize * 8) as f32); + for i in 0..tsize { + let phi = ((i * 2 + 1) as f32) * base; + tab.push(phi.sin()); + tab.push(phi.cos()); + } + } + } + }, +/* DCTMode::DST_IV => { + },*/ + _ => { tab = Vec::new(); }, + }; + + Self { tmp, tab, mode, size, swaps, perms, is_pow2, perm_tab } + } + fn can_do_fast(&mut self) -> bool { + if !self.is_pow2 { return false; } + match self.mode { + DCTMode::DCT_I | DCTMode::DST_I | DCTMode::DST_IV => false, + _ => true, + } + } + fn inplace_fast_dct(&mut self, dst: &mut [f32]) { + match self.mode { + DCTMode::DCT_II => { + dct_II_inplace(dst, self.size, 1, &self.tab, &self.perm_tab); + }, + DCTMode::DST_II => { + dst_II_inplace(dst, self.size, 1, &self.tab, &self.perm_tab); + }, + DCTMode::DCT_III => { + dct_III_inplace(dst, self.size, 1, &self.tab, &self.perm_tab); + }, + DCTMode::DST_III => { + dst_III_inplace(dst, self.size, 1, &self.tab, &self.perm_tab); + }, + DCTMode::DCT_IV => { + dct_IV_inplace(dst, self.size, 1, &self.tab, &self.perm_tab); + }, + _ => unreachable!(), + }; + } + /// Performs DCT/DST. + pub fn do_dct(&mut self, src: &[f32], dst: &mut [f32]) { + if self.can_do_fast() { + for (i, ni) in self.perms.iter().enumerate() { dst[i] = src[*ni]; } + self.inplace_fast_dct(dst); + } else { + do_ref_dct(self.mode, src, dst, self.size); + } + } + /// Performs inplace DCT/DST. + pub fn do_dct_inplace(&mut self, buf: &mut [f32]) { + if self.can_do_fast() { + swap_buf(buf, &self.swaps); + self.inplace_fast_dct(buf); + } else { + self.tmp.copy_from_slice(&buf[0..self.size]); + do_ref_dct(self.mode, &self.tmp, buf, self.size); + } + } + /// Returns the scale for output normalisation. + pub fn get_scale(&self) -> f32 { + let fsize = self.size as f32; + match self.mode { + DCTMode::DCT_I => 2.0 / (fsize - 1.0), + DCTMode::DST_I => 2.0 / (fsize + 1.0), + DCTMode::DCT_II => 1.0, + DCTMode::DCT_III=> 1.0, + DCTMode::DST_II => 1.0, + DCTMode::DST_III=> 1.0, + DCTMode::DCT_IV => 1.0, + _ => 2.0 / fsize, + } + } +} + +fn reverse_bits(inval: u32) -> u32 { + const REV_TAB: [u8; 16] = [ + 0b0000, 0b1000, 0b0100, 0b1100, 0b0010, 0b1010, 0b0110, 0b1110, + 0b0001, 0b1001, 0b0101, 0b1101, 0b0011, 0b1011, 0b0111, 0b1111, + ]; + + let mut ret = 0; + let mut val = inval; + for _ in 0..8 { + ret = (ret << 4) | (REV_TAB[(val & 0xF) as usize] as u32); + val >>= 4; + } + ret +} + +fn swp_idx(idx: usize, bits: u32) -> usize { + let s = reverse_bits(idx as u32) as usize; + s >> (32 - bits) +} + +fn gen_swaps_for_perm(swaps: &mut Vec<usize>, perms: &[usize]) { + let mut idx_arr: Vec<usize> = Vec::with_capacity(perms.len()); + for i in 0..perms.len() { idx_arr.push(i); } + let mut run_size = 0; + let mut run_pos = 0; + for idx in 0..perms.len() { + if perms[idx] == idx_arr[idx] { + if run_size == 0 { run_pos = idx; } + run_size += 1; + } else { + for i in 0..run_size { + swaps.push(run_pos + i); + } + run_size = 0; + let mut spos = idx + 1; + while idx_arr[spos] != perms[idx] { spos += 1; } + idx_arr[spos] = idx_arr[idx]; + idx_arr[idx] = perms[idx]; + swaps.push(spos); + } + } +} + +fn swap_buf(buf: &mut [f32], swaps: &[usize]) { + for (idx, nidx) in swaps.iter().enumerate() { + if idx != *nidx { + buf.swap(*nidx, idx); + } + } +} + +fn do_ref_dct(mode: DCTMode, src: &[f32], dst: &mut [f32], size: usize) { + match mode { + DCTMode::DCT_I => dct_I_ref(src, dst, size), + DCTMode::DST_I => dst_I_ref(src, dst, size), + DCTMode::DCT_II => dct_II_ref(src, dst, size), + DCTMode::DST_II => dst_II_ref(src, dst, size), + DCTMode::DCT_III => dct_III_ref(src, dst, size), + DCTMode::DST_III => dst_III_ref(src, dst, size), + DCTMode::DCT_IV => dct_IV_ref(src, dst, size), + DCTMode::DST_IV => dst_IV_ref(src, dst, size), + }; +} + +#[allow(non_snake_case)] +fn dct_I_ref(src: &[f32], dst: &mut [f32], size: usize) { + let base = consts::PI / ((size - 1) as f32); + for k in 0..size { + let mut sum = (src[0] + (if (k & 1) != 0 { -src[size - 1] } else { src[size - 1] })) * 0.5; + for n in 1..size-1 { + sum += src[n] * (base * ((n * k) as f32)).cos(); + } + dst[k] = sum; + } +} + +#[allow(non_snake_case)] +fn dst_I_ref(src: &[f32], dst: &mut [f32], size: usize) { + let base = consts::PI / ((size + 1) as f32); + for k in 0..size { + let mut sum = 0.0; + for n in 0..size { + sum += src[n] * (base * (((n + 1) * (k + 1)) as f32)).sin(); + } + dst[k] = sum; + } +} + +#[allow(non_snake_case)] +fn dct_II_ref(src: &[f32], dst: &mut [f32], size: usize) { + let base = consts::PI / (size as f32); + for k in 0..size { + let mut sum = 0.0; + for n in 0..size { + sum += src[n] * (base * ((n as f32) + 0.5) * (k as f32)).cos(); + } + dst[k] = sum * (if k == 0 { (1.0 / (size as f32)).sqrt() } else { (2.0 / (size as f32)).sqrt() }); + } +} + +#[allow(non_snake_case)] +fn dst_II_ref(src: &[f32], dst: &mut [f32], size: usize) { + let base = consts::PI / (size as f32); + for k in 0..size { + let mut sum = 0.0; + let kmod = (k + 1) as f32; + for n in 0..size { + sum += src[n] * (base * ((n as f32) + 0.5) * kmod).sin(); + } + dst[k] = sum * (2.0 / (size as f32)).sqrt(); + } + dst[size - 1] /= consts::SQRT_2; +} + +#[allow(non_snake_case)] +fn dct_III_ref(src: &[f32], dst: &mut [f32], size: usize) { + let base = consts::PI / (size as f32); + for k in 0..size { + let mut sum = src[0] / consts::SQRT_2; + let kmod = (k as f32) + 0.5; + for n in 1..size { + sum += src[n] * (base * (n as f32) * kmod).cos(); + } + dst[k] = sum * (2.0 / (size as f32)).sqrt(); + } +} + +#[allow(non_snake_case)] +fn dst_III_ref(src: &[f32], dst: &mut [f32], size: usize) { + let base = consts::PI / (size as f32); + for k in 0..size { + let mut sum = 0.0; + let kmod = (k as f32) + 0.5; + for n in 0..size-1 { + sum += src[n] * (base * ((n + 1) as f32) * kmod).sin(); + } + sum += src[size - 1] / consts::SQRT_2 * (base * (size as f32) * kmod).sin(); + dst[k] = sum * (2.0 / (size as f32)).sqrt(); + } +} + +#[allow(non_snake_case)] +fn dct_IV_ref(src: &[f32], dst: &mut [f32], size: usize) { + let base = consts::PI / (size as f32); + for k in 0..size { + let mut sum = 0.0; + let kmod = (k as f32) + 0.5; + for n in 0..size { + sum += src[n] * (base * ((n as f32) + 0.5) * kmod).cos(); + } + dst[k] = sum; + } +} + +#[allow(non_snake_case)] +fn dst_IV_ref(src: &[f32], dst: &mut [f32], size: usize) { + let base = consts::PI / (size as f32); + for k in 0..size { + let mut sum = 0.0; + let kmod = (k as f32) + 0.5; + for n in 0..size { + sum += src[n] * (base * ((n as f32) + 0.5) * kmod).sin(); + } + dst[k] = sum; + } +} + +const DCT_II_C0: f32 = 0.65328148243818826393; // cos(1*PI/8) / sqrt(2) +const DCT_II_C1: f32 = 0.27059805007309849220; // cos(3*PI/8) / sqrt(2) + +#[allow(non_snake_case)] +fn dct_II_inplace(buf: &mut [f32], size: usize, step: usize, tab: &[f32], perm_tab: &[usize]) { + match size { + 0 | 1 => {}, + 2 => { + let i0 = buf[0]; + let i1 = buf[step]; + buf[0] = (i0 + i1) / consts::SQRT_2; + buf[step] = (i0 - i1) / consts::SQRT_2; + }, + 4 => { + let i0 = buf[0 * step]; + let i1 = buf[2 * step]; + let i2 = buf[1 * step]; + let i3 = buf[3 * step]; + let t0 = (i0 + i3) * 0.5; + let t1 = (i1 + i2) * 0.5; + buf[0 * step] = t0 + t1; + buf[2 * step] = t0 - t1; + let t0 = i0 - i3; + let t1 = i1 - i2; + buf[1 * step] = DCT_II_C0 * t0 + DCT_II_C1 * t1; + buf[3 * step] = DCT_II_C1 * t0 - DCT_II_C0 * t1; + }, + _ => { + let hsize = size >> 1; + for i in 0..hsize { + let i0 = buf[i * step]; + let i1 = buf[(size - 1 - i) * step]; + if (i & 1) == 0 { + buf[i * step] = (i0 + i1) / consts::SQRT_2; + buf[(size - 1 - i) * step] = (i0 - i1) / consts::SQRT_2; + } else { + buf[i * step] = (i1 - i0) / consts::SQRT_2; + buf[(size - 1 - i) * step] = (i1 + i0) / consts::SQRT_2; + } + } + dct_II_inplace(buf, hsize, step * 2, tab, perm_tab); + dct_II_part2_inplace(&mut buf[step..], hsize, step * 2, tab, perm_tab); + }, + }; +} + +#[allow(non_snake_case)] +fn dct_II_part2_inplace(buf: &mut [f32], size: usize, step: usize, tab: &[f32], perm_tab: &[usize]) { + let hsize = size >> 1; +// todo optimise for size = 4 + for i in 0..hsize { + let i0 = buf[perm_tab[size + i] * step]; + let i1 = buf[perm_tab[size + size - i - 1] * step]; + let c0 = tab[size + i * 2 + 0]; + let c1 = tab[size + i * 2 + 1]; + buf[perm_tab[size + i] * step] = c0 * i0 + c1 * i1; + buf[perm_tab[size + size - i - 1] * step] = c0 * i1 - c1 * i0; + } + + dct_II_inplace(buf, hsize, step * 2, tab, perm_tab); + dst_II_inplace(&mut buf[step..], hsize, step * 2, tab, perm_tab); + + buf[(size - 1) * step] = -buf[(size - 1) * step]; + for i in 1..hsize { + let (i0, i1) = if (i & 1) == 0 { + (buf[i * step * 2], -buf[i * step * 2 - step]) + } else { + (buf[i * step * 2], buf[i * step * 2 - step]) + }; + buf[i * step * 2 - step] = (i0 + i1) / consts::SQRT_2; + buf[i * step * 2] = (i0 - i1) / consts::SQRT_2; + } +} + +#[allow(non_snake_case)] +fn dst_II_inplace(buf: &mut [f32], size: usize, step: usize, tab: &[f32], perm_tab: &[usize]) { + if size <= 1 { return; } + let hsize = size >> 1; + for i in hsize..size { buf[i * step] = -buf[i * step]; } + dct_II_inplace(buf, size, step, tab, perm_tab); + for i in 0..hsize { + let idx0 = i * step; + let idx1 = (size - 1 - i) * step; + buf.swap(idx0, idx1); + } +} + +#[allow(non_snake_case)] +fn dct_III_inplace(buf: &mut [f32], size: usize, step: usize, tab: &[f32], perm_tab: &[usize]) { + if size <= 1 { return; } + let hsize = size >> 1; + dct_III_inplace(buf, hsize, step, tab, perm_tab); + dct_IV_inplace(&mut buf[step*hsize..], hsize, step, tab, perm_tab); + for i in 0..(size >> 2) { + buf.swap((size - 1 - i) * step, (hsize + i) * step); + } + for i in 0..hsize { + let i0 = buf[i * step] / consts::SQRT_2; + let i1 = buf[(size-i-1) * step] / consts::SQRT_2; + buf[i * step] = i0 + i1; + buf[(size-i-1) * step] = i0 - i1; + } +} + +#[allow(non_snake_case)] +fn dst_III_inplace(buf: &mut [f32], size: usize, step: usize, tab: &[f32], perm_tab: &[usize]) { + if size <= 1 { return; } + let hsize = size >> 1; + for i in 0..hsize { + let idx0 = i * step; + let idx1 = (size - 1 - i) * step; + buf.swap(idx0, idx1); + } + dct_III_inplace(buf, size, step, tab, perm_tab); + for i in 0..hsize { buf[i * 2 * step + step] = -buf[i * 2 * step + step]; } +} + +#[allow(non_snake_case)] +fn dct_IV_inplace(buf: &mut [f32], size: usize, step: usize, tab: &[f32], perm_tab: &[usize]) { + if size <= 1 { return; } + let hsize = size >> 1; + + for i in 0..hsize { + let idx0 = perm_tab[size + i]; + let idx1 = size - 1 - idx0; + let i0 = buf[idx0 * step]; + let i1 = buf[idx1 * step]; + let c0 = tab[size + i * 2 + 1]; + let c1 = tab[size + i * 2 + 0]; + buf[idx0 * step] = c0 * i0 + c1 * i1; + buf[idx1 * step] = c0 * i1 - c1 * i0; + } + for i in (hsize+1..size).step_by(2) { + buf[i] = -buf[i]; + } + dct_II_inplace(buf, hsize, step * 2, tab, perm_tab); + dct_II_inplace(&mut buf[step..], hsize, step * 2, tab, perm_tab); + for i in 0..(size >> 2) { + buf.swap((size - 1 - i * 2) * step, (i * 2 + 1) * step); + } + for i in (3..size).step_by(4) { + buf[i] = -buf[i]; + } + buf[0] *= consts::SQRT_2; + buf[(size - 1) * step] *= -consts::SQRT_2; + for i in 0..hsize-1 { + let i0 = buf[(i * 2 + 2) * step]; + let i1 = buf[(i * 2 + 1) * step]; + buf[(i * 2 + 2) * step] = i0 + i1; + buf[(i * 2 + 1) * step] = i0 - i1; + } + for i in 0..size { + buf[i * step] /= consts::SQRT_2; + } +} + +#[cfg(test)] +mod test { + use super::*; + + fn test_pair(mode: DCTMode, invmode: DCTMode, size: usize) { + println!("testing {:?} -> {:?}", mode, invmode); + let mut fin: Vec<f32> = Vec::with_capacity(size); + let mut out: Vec<f32> = Vec::with_capacity(size); + out.resize(size, 0.0); + let mut seed: u32 = 42; + for _ in 0..size { + seed = seed.wrapping_mul(1664525).wrapping_add(1013904223); + let val = (seed >> 16) as i16; + fin.push((val as f32) / 256.0); + } + let mut dct = DCT::new(mode, size); + dct.do_dct(&fin, &mut out); + let mut dct = DCT::new(invmode, size); + dct.do_dct_inplace(&mut out); + + let scale = dct.get_scale(); + for i in 0..fin.len() { + assert!((fin[i] - out[i]*scale).abs() < 1.0e-2); + } + } + #[test] + fn test_dct() { + test_pair(DCTMode::DCT_I, DCTMode::DCT_I, 32); + test_pair(DCTMode::DST_I, DCTMode::DST_I, 32); + test_pair(DCTMode::DCT_II, DCTMode::DCT_III, 32); + test_pair(DCTMode::DST_II, DCTMode::DST_III, 32); + test_pair(DCTMode::DCT_III, DCTMode::DCT_II, 32); + test_pair(DCTMode::DST_III, DCTMode::DST_II, 32); + test_pair(DCTMode::DCT_IV, DCTMode::DCT_IV, 32); + test_pair(DCTMode::DST_IV, DCTMode::DST_IV, 32); + } +} diff --git a/nihav-codec-support/src/dsp/fft.rs b/nihav-codec-support/src/dsp/fft.rs new file mode 100644 index 0000000..4629f75 --- /dev/null +++ b/nihav-codec-support/src/dsp/fft.rs @@ -0,0 +1,912 @@ +//! FFT and RDFT implementation. +use std::f32::{self, consts}; +use std::ops::{Not, Neg, Add, AddAssign, Sub, SubAssign, Mul, MulAssign}; +use std::fmt; + +/// Complex number. +#[repr(C)] +#[derive(Debug,Clone,Copy,PartialEq)] +pub struct FFTComplex { + /// Real part of the numner. + pub re: f32, + /// Complex part of the number. + pub im: f32, +} + +impl FFTComplex { + /// Calculates `exp(i * val)`. + pub fn exp(val: f32) -> Self { + FFTComplex { re: val.cos(), im: val.sin() } + } + /// Returns `-Im + i * Re`. + pub fn rotate(self) -> Self { + FFTComplex { re: -self.im, im: self.re } + } + /// Multiplies complex number by scalar. + pub fn scale(self, scale: f32) -> Self { + FFTComplex { re: self.re * scale, im: self.im * scale } + } +} + +impl Neg for FFTComplex { + type Output = FFTComplex; + fn neg(self) -> Self::Output { + FFTComplex { re: -self.re, im: -self.im } + } +} + +impl Not for FFTComplex { + type Output = FFTComplex; + fn not(self) -> Self::Output { + FFTComplex { re: self.re, im: -self.im } + } +} + +impl Add for FFTComplex { + type Output = FFTComplex; + fn add(self, other: Self) -> Self::Output { + FFTComplex { re: self.re + other.re, im: self.im + other.im } + } +} + +impl AddAssign for FFTComplex { + fn add_assign(&mut self, other: Self) { + self.re += other.re; + self.im += other.im; + } +} + +impl Sub for FFTComplex { + type Output = FFTComplex; + fn sub(self, other: Self) -> Self::Output { + FFTComplex { re: self.re - other.re, im: self.im - other.im } + } +} + +impl SubAssign for FFTComplex { + fn sub_assign(&mut self, other: Self) { + self.re -= other.re; + self.im -= other.im; + } +} + +impl Mul for FFTComplex { + type Output = FFTComplex; + fn mul(self, other: Self) -> Self::Output { + FFTComplex { re: self.re * other.re - self.im * other.im, + im: self.im * other.re + self.re * other.im } + } +} + +impl MulAssign for FFTComplex { + fn mul_assign(&mut self, other: Self) { + let re = self.re * other.re - self.im * other.im; + let im = self.im * other.re + self.re * other.im; + self.re = re; + self.im = im; + } +} + +impl fmt::Display for FFTComplex { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "({}, {})", self.re, self.im) + } +} + +/// Complex number with zero value. +pub const FFTC_ZERO: FFTComplex = FFTComplex { re: 0.0, im: 0.0 }; + +/// Calculates forward or inverse FFT in the straightforward way. +pub fn generic_fft(data: &mut [FFTComplex], forward: bool) { + let mut tmp = Vec::with_capacity(data.len()); + tmp.resize(data.len(), FFTC_ZERO); + let base = if forward { -consts::PI * 2.0 / (data.len() as f32) } + else { consts::PI * 2.0 / (data.len() as f32) }; + for k in 0..data.len() { + let mut sum = FFTC_ZERO; + for n in 0..data.len() { + let w = FFTComplex::exp(base * ((n * k) as f32)); + sum += data[n] * w; + } + tmp[k] = sum; + } + for k in 0..data.len() { + data[k] = tmp[k]; + } +} + +struct FFTData { + table: Vec<FFTComplex>, + tmp: Vec<FFTComplex>, + twiddle: Vec<FFTComplex>, + size: usize, + step: usize, + div: usize, +} + +struct FFTGeneric {} + +const FFT3_CONST: f32 = 0.86602540378443864677; +const FFT5_CONST1: FFTComplex = FFTComplex { re: 0.80901699437494742410, im: 0.58778525229247312915 }; +const FFT5_CONST2: FFTComplex = FFTComplex { re: 0.30901699437494742411, im: 0.95105651629515357211 }; + +fn twiddle5(a: FFTComplex, b: FFTComplex, c: FFTComplex) -> (FFTComplex, FFTComplex) { + let re = a.re * c.re; + let im = a.im * c.re; + let diffre = b.im * c.im; + let diffim = b.re * c.im; + + (FFTComplex { re: re - diffre, im: im + diffim }, FFTComplex { re: re + diffre, im: im - diffim }) +} + +impl FFTGeneric { + fn new_data(size: usize, forward: bool) -> FFTData { + let mut table: Vec<FFTComplex> = Vec::with_capacity(size * size); + table.resize(size * size, FFTC_ZERO); + let base = consts::PI * 2.0 / (size as f32); + if forward { + for n in 0..size { + for k in 0..size { + table[n * size + k] = FFTComplex::exp(-base * ((n * k) as f32)); + } + } + } else { + for n in 0..size { + for k in 0..size { + table[n * size + k] = FFTComplex::exp( base * ((n * k) as f32)); + } + } + } + let mut tmp = Vec::with_capacity(size); + tmp.resize(size, FFTC_ZERO); + FFTData { table, tmp, twiddle: Vec::new(), size, step: 0, div: 0 } + } + fn fft(tbl: &mut FFTData, size: usize, data: &mut [FFTComplex], step: usize) { + if size == 3 { + let s0 = data[step * 0]; + let s1 = data[step * 1]; + let s2 = data[step * 2]; + let t0 = s1 + s2; + data[step * 0] += t0; + let t1 = s0 - t0.scale(0.5); + let t2 = (s2 - s1).rotate().scale(FFT3_CONST); + data[step * 1] = t1 + t2; + data[step * 2] = t1 - t2; + return; + } + if size == 5 { + let s0 = data[step * 0]; + let s1 = data[step * 1]; + let s2 = data[step * 2]; + let s3 = data[step * 3]; + let s4 = data[step * 4]; + + let t0 = s1 + s4; + let t1 = s1 - s4; + let t2 = s2 + s3; + let t3 = s2 - s3; + let (t4, t5) = twiddle5(t0, t1, FFT5_CONST2); + let (t6, t7) = twiddle5(t2, t3, FFT5_CONST1); + let (t8, t9) = twiddle5(t0, t1, FFT5_CONST1); + let (ta, tb) = twiddle5(t2, t3, FFT5_CONST2); + + data[step * 0] = s0 + t0 + t2; + data[step * 1] = s0 + t5 - t6; + data[step * 2] = s0 - t8 + ta; + data[step * 3] = s0 - t9 + tb; + data[step * 4] = s0 + t4 - t7; + return; + } + for k in 0..tbl.size { + tbl.tmp[k] = FFTC_ZERO; + for n in 0..tbl.size { + tbl.tmp[k] += data[n * step] * tbl.table[k * tbl.size + n]; + } + } + for n in 0..tbl.size { + data[n * step] = tbl.tmp[n]; + } + } + fn ifft(tbl: &mut FFTData, size: usize, data: &mut [FFTComplex], step: usize) { + if size == 3 { + let s0 = data[step * 0]; + let s1 = data[step * 1]; + let s2 = data[step * 2]; + let t0 = s1 + s2; + data[step * 0] += t0; + let t1 = s0 - t0.scale(0.5); + let t2 = (s2 - s1).rotate().scale(FFT3_CONST); + data[step * 1] = t1 - t2; + data[step * 2] = t1 + t2; + return; + } + if size == 5 { + let s0 = data[step * 0]; + let s1 = data[step * 1]; + let s2 = data[step * 2]; + let s3 = data[step * 3]; + let s4 = data[step * 4]; + + let t0 = s1 + s4; + let t1 = s1 - s4; + let t2 = s2 + s3; + let t3 = s2 - s3; + let (t4, t5) = twiddle5(t0, t1, FFT5_CONST2); + let (t6, t7) = twiddle5(t2, t3, FFT5_CONST1); + let (t8, t9) = twiddle5(t0, t1, FFT5_CONST1); + let (ta, tb) = twiddle5(t2, t3, FFT5_CONST2); + + data[step * 0] = s0 + t0 + t2; + data[step * 1] = s0 + t4 - t7; + data[step * 2] = s0 - t9 + tb; + data[step * 3] = s0 - t8 + ta; + data[step * 4] = s0 + t5 - t6; + return; + } + Self::fft(tbl, size, data, step); + } +} + +struct FFTSplitRadix {} + +impl FFTSplitRadix { + fn new_data(bits: u8, _forward: bool) -> FFTData { + let size = 1 << bits; + let mut table = Vec::with_capacity(size); + for _ in 0..4 { table.push(FFTC_ZERO); } + for b in 3..=bits { + let qsize = (1 << (b - 2)) as usize; + let base = -consts::PI / ((qsize * 2) as f32); + for k in 0..qsize { + table.push(FFTComplex::exp(base * ((k * 1) as f32))); + table.push(FFTComplex::exp(base * ((k * 3) as f32))); + } + } + FFTData { table, tmp: Vec::new(), twiddle: Vec::new(), size, step: 0, div: 0 } + } + fn fft(fftdata: &mut FFTData, bits: u8, data: &mut [FFTComplex]) { + if bits == 0 { return; } + if bits == 1 { + let sum01 = data[0] + data[1]; + let dif01 = data[0] - data[1]; + data[0] = sum01; + data[1] = dif01; + return; + } + if bits == 2 { + let sum01 = data[0] + data[2]; + let dif01 = data[0] - data[2]; + let sum23 = data[1] + data[3]; + let dif23 = data[1] - data[3]; + data[0] = sum01 + sum23; + data[1] = dif01 - dif23.rotate(); + data[2] = sum01 - sum23; + data[3] = dif01 + dif23.rotate(); + return; + } + let qsize = (1 << (bits - 2)) as usize; + let hsize = (1 << (bits - 1)) as usize; + let q3size = qsize + hsize; + + Self::fft(fftdata, bits - 1, &mut data[0 ..hsize]); + Self::fft(fftdata, bits - 2, &mut data[hsize ..q3size]); + Self::fft(fftdata, bits - 2, &mut data[q3size..]); + let off = hsize; + { + let t3 = data[0 + hsize] + data[0 + q3size]; + let t4 = (data[0 + hsize] - data[0 + q3size]).rotate(); + let e1 = data[0]; + let e2 = data[0 + qsize]; + data[0] = e1 + t3; + data[0 + qsize] = e2 - t4; + data[0 + hsize] = e1 - t3; + data[0 + q3size] = e2 + t4; + } + for k in 1..qsize { + let t1 = fftdata.table[off + k * 2 + 0] * data[k + hsize]; + let t2 = fftdata.table[off + k * 2 + 1] * data[k + q3size]; + let t3 = t1 + t2; + let t4 = (t1 - t2).rotate(); + let e1 = data[k]; + let e2 = data[k + qsize]; + data[k] = e1 + t3; + data[k + qsize] = e2 - t4; + data[k + hsize] = e1 - t3; + data[k + qsize * 3] = e2 + t4; + } + } + fn ifft(fftdata: &mut FFTData, bits: u8, data: &mut [FFTComplex]) { + if bits == 0 { return; } + if bits == 1 { + let sum01 = data[0] + data[1]; + let dif01 = data[0] - data[1]; + data[0] = sum01; + data[1] = dif01; + return; + } + if bits == 2 { + let sum01 = data[0] + data[2]; + let dif01 = data[0] - data[2]; + let sum23 = data[1] + data[3]; + let dif23 = data[1] - data[3]; + data[0] = sum01 + sum23; + data[1] = dif01 + dif23.rotate(); + data[2] = sum01 - sum23; + data[3] = dif01 - dif23.rotate(); + return; + } + let qsize = (1 << (bits - 2)) as usize; + let hsize = (1 << (bits - 1)) as usize; + let q3size = qsize + hsize; + + Self::ifft(fftdata, bits - 1, &mut data[0 ..hsize]); + Self::ifft(fftdata, bits - 2, &mut data[hsize ..q3size]); + Self::ifft(fftdata, bits - 2, &mut data[q3size..]); + let off = hsize; + { + let t3 = data[0 + hsize] + data[0 + q3size]; + let t4 = (data[0 + hsize] - data[0 + q3size]).rotate(); + let e1 = data[0]; + let e2 = data[0 + qsize]; + data[0] = e1 + t3; + data[0 + qsize] = e2 + t4; + data[0 + hsize] = e1 - t3; + data[0 + q3size] = e2 - t4; + } + for k in 1..qsize { + let t1 = !fftdata.table[off + k * 2 + 0] * data[k + hsize]; + let t2 = !fftdata.table[off + k * 2 + 1] * data[k + q3size]; + let t3 = t1 + t2; + let t4 = (t1 - t2).rotate(); + let e1 = data[k]; + let e2 = data[k + qsize]; + data[k] = e1 + t3; + data[k + qsize] = e2 + t4; + data[k + hsize] = e1 - t3; + data[k + qsize * 3] = e2 - t4; + } + } +} + +struct FFT15 {} + +const FFT15_INSWAP: [usize; 20] = [ 0, 5, 10, 42, 3, 8, 13, 42, 6, 11, 1, 42, 9, 14, 4, 42, 12, 2, 7, 42 ]; +const FFT15_OUTSWAP: [usize; 20] = [ 0, 10, 5, 42, 6, 1, 11, 42, 12, 7, 2, 42, 3, 13, 8, 42, 9, 4, 14, 42 ]; + +impl FFT15 { + fn new_data(size: usize, _forward: bool) -> FFTData { + FFTData { table: Vec::new(), tmp: Vec::new(), twiddle: Vec::new(), size, step: 0, div: 0 } + } + fn fft3(dst: &mut [FFTComplex], src: &[FFTComplex], step: usize, n: usize) { + let s0 = src[0]; + let s1 = src[1]; + let s2 = src[2]; + + let t0 = s1 + s2; + let t1 = s0 - t0.scale(0.5); + let t2 = (s2 - s1).rotate().scale(FFT3_CONST); + + dst[FFT15_OUTSWAP[n * 4 + 0] * step] = s0 + t0; + dst[FFT15_OUTSWAP[n * 4 + 1] * step] = t1 + t2; + dst[FFT15_OUTSWAP[n * 4 + 2] * step] = t1 - t2; + } + fn ifft3(dst: &mut [FFTComplex], src: &[FFTComplex], step: usize, n: usize) { + let s0 = src[0]; + let s1 = src[1]; + let s2 = src[2]; + + let t0 = s1 + s2; + let t1 = s0 - t0.scale(0.5); + let t2 = (s2 - s1).rotate().scale(FFT3_CONST); + + dst[FFT15_OUTSWAP[n * 4 + 0] * step] = s0 + t0; + dst[FFT15_OUTSWAP[n * 4 + 1] * step] = t1 - t2; + dst[FFT15_OUTSWAP[n * 4 + 2] * step] = t1 + t2; + } + fn fft5(dst: &mut [FFTComplex], src: &[FFTComplex], step: usize, n: usize) { + let s0 = src[FFT15_INSWAP[n + 0 * 4] * step]; + let s1 = src[FFT15_INSWAP[n + 1 * 4] * step]; + let s2 = src[FFT15_INSWAP[n + 2 * 4] * step]; + let s3 = src[FFT15_INSWAP[n + 3 * 4] * step]; + let s4 = src[FFT15_INSWAP[n + 4 * 4] * step]; + + let t0 = s1 + s4; + let t1 = s1 - s4; + let t2 = s2 + s3; + let t3 = s2 - s3; + let (t4, t5) = twiddle5(t0, t1, FFT5_CONST2); + let (t6, t7) = twiddle5(t2, t3, FFT5_CONST1); + let (t8, t9) = twiddle5(t0, t1, FFT5_CONST1); + let (ta, tb) = twiddle5(t2, t3, FFT5_CONST2); + + dst[0 * 3] = s0 + t0 + t2; + dst[1 * 3] = s0 + t5 - t6; + dst[2 * 3] = s0 - t8 + ta; + dst[3 * 3] = s0 - t9 + tb; + dst[4 * 3] = s0 + t4 - t7; + } + fn ifft5(dst: &mut [FFTComplex], src: &[FFTComplex], step: usize, n: usize) { + let s0 = src[FFT15_INSWAP[n + 0 * 4] * step]; + let s1 = src[FFT15_INSWAP[n + 1 * 4] * step]; + let s2 = src[FFT15_INSWAP[n + 2 * 4] * step]; + let s3 = src[FFT15_INSWAP[n + 3 * 4] * step]; + let s4 = src[FFT15_INSWAP[n + 4 * 4] * step]; + + let t0 = s1 + s4; + let t1 = s1 - s4; + let t2 = s2 + s3; + let t3 = s2 - s3; + let (t4, t5) = twiddle5(t0, t1, FFT5_CONST2); + let (t6, t7) = twiddle5(t2, t3, FFT5_CONST1); + let (t8, t9) = twiddle5(t0, t1, FFT5_CONST1); + let (ta, tb) = twiddle5(t2, t3, FFT5_CONST2); + + dst[0 * 3] = s0 + t0 + t2; + dst[1 * 3] = s0 + t4 - t7; + dst[2 * 3] = s0 - t9 + tb; + dst[3 * 3] = s0 - t8 + ta; + dst[4 * 3] = s0 + t5 - t6; + } + fn fft(_fftdata: &mut FFTData, data: &mut [FFTComplex], step: usize) { + let mut tmp = [FFTC_ZERO; 15]; + for n in 0..3 { + Self::fft5(&mut tmp[n..], data, step, n); + } + for n in 0..5 { + Self::fft3(data, &tmp[n * 3..][..3], step, n); + } + } + fn ifft(_fftdata: &mut FFTData, data: &mut [FFTComplex], step: usize) { + let mut tmp = [FFTC_ZERO; 15]; + for n in 0..3 { + Self::ifft5(&mut tmp[n..], data, step, n); + } + for n in 0..5 { + Self::ifft3(data, &tmp[n * 3..][..3], step, n); + } + } +} + + +enum FFTMode { + Generic(usize), + SplitRadix(u8), + Prime15, +} + +impl FFTMode { + fn permute(&self, perms: &mut [usize]) { + match *self { + FFTMode::Generic(_) => {}, + FFTMode::SplitRadix(bits) => { + let div = perms.len() >> bits; + gen_sr_perms(perms, 1 << bits); + if div > 1 { + for i in 0..(1 << bits) { + perms[i] *= div; + } + for i in 1..div { + for j in 0..(1 << bits) { + perms[(i << bits) + j] = perms[j] + i; + } + } + } + }, + FFTMode::Prime15 => {}, + }; + } + fn do_fft(&self, fftdata: &mut FFTData, data: &mut [FFTComplex]) { + match *self { + FFTMode::Generic(size) => FFTGeneric::fft(fftdata, size, data, 1), + FFTMode::SplitRadix(bits) => FFTSplitRadix::fft(fftdata, bits, data), + FFTMode::Prime15 => FFT15::fft(fftdata, data, 1), + }; + } + fn do_fft2(&self, fftdata: &mut FFTData, data: &mut [FFTComplex], step: usize) { + match *self { + FFTMode::Generic(size) => FFTGeneric::fft(fftdata, size, data, step), + FFTMode::SplitRadix(_) => unreachable!(), + FFTMode::Prime15 => FFT15::fft(fftdata, data, step), + }; + } + fn do_ifft(&self, fftdata: &mut FFTData, data: &mut [FFTComplex]) { + match *self { + FFTMode::Generic(size) => FFTGeneric::ifft(fftdata, size, data, 1), + FFTMode::SplitRadix(bits) => FFTSplitRadix::ifft(fftdata, bits, data), + FFTMode::Prime15 => FFT15::ifft(fftdata, data, 1), + }; + } + fn do_ifft2(&self, fftdata: &mut FFTData, data: &mut [FFTComplex], step: usize) { + match *self { + FFTMode::Generic(size) => FFTGeneric::ifft(fftdata, size, data, step), + FFTMode::SplitRadix(_) => unreachable!(), + FFTMode::Prime15 => FFT15::ifft(fftdata, data, step), + }; + } + fn get_size(&self) -> usize { + match *self { + FFTMode::Generic(size) => size, + FFTMode::SplitRadix(bits) => 1 << bits, + FFTMode::Prime15 => 15, + } + } +} + +/// FFT working context. +pub struct FFT { + perms: Vec<usize>, + swaps: Vec<usize>, + ffts: Vec<(FFTMode, FFTData)>, +} + +impl FFT { + /// Calculates Fourier transform. + pub fn do_fft(&mut self, src: &[FFTComplex], dst: &mut [FFTComplex]) { + for k in 0..src.len() { dst[k] = src[self.perms[k]]; } + self.do_fft_core(dst); + } + /// Calculates inverse Fourier transform. + pub fn do_ifft(&mut self, src: &[FFTComplex], dst: &mut [FFTComplex]) { + for k in 0..src.len() { dst[k] = src[self.perms[k]]; } + self.do_ifft_core(dst); + } + /// Performs inplace FFT. + pub fn do_fft_inplace(&mut self, data: &mut [FFTComplex]) { + for idx in 0..self.swaps.len() { + let nidx = self.swaps[idx]; + if idx != nidx { + data.swap(nidx, idx); + } + } + self.do_fft_core(data); + } + /// Performs inplace inverse FFT. + pub fn do_ifft_inplace(&mut self, data: &mut [FFTComplex]) { + for idx in 0..self.swaps.len() { + let nidx = self.swaps[idx]; + if idx != nidx { + data.swap(nidx, idx); + } + } + self.do_ifft_core(data); + } + fn do_fft_core(&mut self, data: &mut [FFTComplex]) { + for el in self.ffts.iter_mut() { + let (mode, ref mut fftdata) = el; + let bsize = mode.get_size(); + let div = fftdata.div; + let step = fftdata.step; + if step == 1 { + mode.do_fft(fftdata, data); + for i in 1..div { + mode.do_fft(fftdata, &mut data[i * bsize..]); + } + } else { + mode.do_fft2(fftdata, data, div); + let mut toff = bsize; + for i in 1..div { + for j in 1..bsize { + data[i + j * div] *= fftdata.twiddle[toff + j]; + } + mode.do_fft2(fftdata, &mut data[i..], div); + toff += bsize; + } + } + } + } + fn do_ifft_core(&mut self, data: &mut [FFTComplex]) { + for el in self.ffts.iter_mut() { + let (mode, ref mut fftdata) = el; + let bsize = mode.get_size(); + let div = fftdata.div; + let step = fftdata.step; + if step == 1 { + mode.do_ifft(fftdata, data); + for i in 1..div { + mode.do_ifft(fftdata, &mut data[i * bsize..]); + } + } else { + mode.do_ifft2(fftdata, data, div); + let mut toff = bsize; + for i in 1..div { + for j in 1..bsize { + data[i + j * div] *= fftdata.twiddle[toff + j]; + } + mode.do_ifft2(fftdata, &mut data[i..], div); + toff += bsize; + } + } + } + } +} + +/// [`FFT`] context creator. +/// +/// [`FFT`]: ./struct.FFT.html +pub struct FFTBuilder { +} + +/*fn reverse_bits(inval: u32) -> u32 { + const REV_TAB: [u8; 16] = [ + 0b0000, 0b1000, 0b0100, 0b1100, 0b0010, 0b1010, 0b0110, 0b1110, + 0b0001, 0b1001, 0b0101, 0b1101, 0b0011, 0b1011, 0b0111, 0b1111, + ]; + + let mut ret = 0; + let mut val = inval; + for _ in 0..8 { + ret = (ret << 4) | (REV_TAB[(val & 0xF) as usize] as u32); + val = val >> 4; + } + ret +} + +fn swp_idx(idx: usize, bits: u32) -> usize { + let s = reverse_bits(idx as u32) as usize; + s >> (32 - bits) +}*/ + +fn gen_sr_perms(swaps: &mut [usize], size: usize) { + if size <= 4 { return; } + let mut evec: Vec<usize> = Vec::with_capacity(size / 2); + let mut ovec1: Vec<usize> = Vec::with_capacity(size / 4); + let mut ovec2: Vec<usize> = Vec::with_capacity(size / 4); + for k in 0..size/4 { + evec.push (swaps[k * 4 + 0]); + ovec1.push(swaps[k * 4 + 1]); + evec.push (swaps[k * 4 + 2]); + ovec2.push(swaps[k * 4 + 3]); + } + for k in 0..size/2 { swaps[k] = evec[k]; } + for k in 0..size/4 { swaps[k + size/2] = ovec1[k]; } + for k in 0..size/4 { swaps[k + 3*size/4] = ovec2[k]; } + gen_sr_perms(&mut swaps[0..size/2], size/2); + gen_sr_perms(&mut swaps[size/2..3*size/4], size/4); + gen_sr_perms(&mut swaps[3*size/4..], size/4); +} + +fn gen_swaps_for_perm(swaps: &mut Vec<usize>, perms: &[usize]) { + let mut idx_arr: Vec<usize> = Vec::with_capacity(perms.len()); + for i in 0..perms.len() { idx_arr.push(i); } + let mut run_size = 0; + let mut run_pos = 0; + for idx in 0..perms.len() { + if perms[idx] == idx_arr[idx] { + if run_size == 0 { run_pos = idx; } + run_size += 1; + } else { + for i in 0..run_size { + swaps.push(run_pos + i); + } + run_size = 0; + let mut spos = idx + 1; + while idx_arr[spos] != perms[idx] { spos += 1; } + idx_arr[spos] = idx_arr[idx]; + idx_arr[idx] = perms[idx]; + swaps.push(spos); + } + } +} + +impl FFTBuilder { + fn generate_twiddle(data: &mut FFTData, size: usize, cur_size: usize, forward: bool) { + if size == cur_size { return; } + data.twiddle = Vec::with_capacity(size); + let div = size / cur_size; + let base = if forward { -2.0 * consts::PI / (size as f32) } else { 2.0 * consts::PI / (size as f32) }; + for n in 0..div { + for k in 0..cur_size { + data.twiddle.push(FFTComplex::exp(base * ((k * n) as f32))); + } + } + } + /// Constructs a new `FFT` context. + pub fn new_fft(size: usize, forward: bool) -> FFT { + let mut ffts: Vec<(FFTMode, FFTData)> = Vec::with_capacity(1); + let mut perms: Vec<usize> = Vec::with_capacity(size); + let mut swaps: Vec<usize> = Vec::with_capacity(size); + let mut rem_size = size; + if rem_size.trailing_zeros() > 0 { + let bits = rem_size.trailing_zeros() as u8; + let mut data = FFTSplitRadix::new_data(bits, forward); + Self::generate_twiddle(&mut data, size, 1 << bits, forward); + data.step = 1; + data.div = rem_size >> bits; + ffts.push((FFTMode::SplitRadix(bits), data)); + rem_size >>= bits; + } + if (rem_size % 15) == 0 { + let mut data = FFT15::new_data(size, forward); + Self::generate_twiddle(&mut data, size, 15, forward); + data.step = size / rem_size; + data.div = size / rem_size; + ffts.push((FFTMode::Prime15, data)); + rem_size /= 15; + } + if rem_size > 1 { + let mut data = FFTGeneric::new_data(rem_size, forward); + Self::generate_twiddle(&mut data, size, rem_size, forward); + data.step = size / rem_size; + data.div = size / rem_size; + ffts.push((FFTMode::Generic(rem_size), data)); + } + + for i in 0..size { + perms.push(i); + } + for (mode, _) in ffts.iter().rev() { + mode.permute(&mut perms); + } + gen_swaps_for_perm(&mut swaps, perms.as_slice()); + + FFT { perms, swaps, ffts } + } +} + +/// RDFT working context. +pub struct RDFT { + table: Vec<FFTComplex>, + fft: FFT, + fwd: bool, + size: usize, + fwd_fft: bool, +} + +fn crossadd(a: FFTComplex, b: FFTComplex) -> FFTComplex { + FFTComplex { re: a.re + b.re, im: a.im - b.im } +} + +impl RDFT { + /// Calculates RDFT. + pub fn do_rdft(&mut self, src: &[FFTComplex], dst: &mut [FFTComplex]) { + dst.copy_from_slice(src); + self.do_rdft_inplace(dst); + } + /// Calculates inplace RDFT. + pub fn do_rdft_inplace(&mut self, buf: &mut [FFTComplex]) { + if !self.fwd { + for n in 0..self.size/2 { + let in0 = buf[n + 1]; + let in1 = buf[self.size - n - 1]; + + let t0 = crossadd(in0, in1); + let t1 = FFTComplex { re: in1.im + in0.im, im: in1.re - in0.re }; + let tab = self.table[n]; + let t2 = FFTComplex { re: t1.im * tab.im + t1.re * tab.re, im: t1.im * tab.re - t1.re * tab.im }; + + buf[n + 1] = FFTComplex { re: t0.im - t2.im, im: t0.re - t2.re }; // (t0 - t2).conj().rotate() + buf[self.size - n - 1] = (t0 + t2).rotate(); + } + let a = buf[0].re; + let b = buf[0].im; + buf[0].re = a - b; + buf[0].im = a + b; + } + if self.fwd_fft { + self.fft.do_fft_inplace(buf); + } else { + self.fft.do_ifft_inplace(buf); + } + if self.fwd { + for n in 0..self.size/2 { + let in0 = buf[n + 1]; + let in1 = buf[self.size - n - 1]; + + let t0 = crossadd(in0, in1).scale(0.5); + let t1 = FFTComplex { re: in0.im + in1.im, im: in0.re - in1.re }; + let t2 = t1 * self.table[n]; + + buf[n + 1] = crossadd(t0, t2); + buf[self.size - n - 1] = FFTComplex { re: t0.re - t2.re, im: -(t0.im + t2.im) }; + } + let a = buf[0].re; + let b = buf[0].im; + buf[0].re = a + b; + buf[0].im = a - b; + } else { + for n in 0..self.size { + buf[n] = FFTComplex{ re: buf[n].im, im: buf[n].re }; + } + } + } +} + +/// [`RDFT`] context creator. +/// +/// [`RDFT`]: ./struct.FFT.html +pub struct RDFTBuilder { +} + +impl RDFTBuilder { + /// Constructs a new `RDFT` context. + pub fn new_rdft(size: usize, forward: bool, forward_fft: bool) -> RDFT { + let mut table: Vec<FFTComplex> = Vec::with_capacity(size / 4); + let (base, scale) = if forward { (consts::PI / (size as f32), 0.5) } else { (-consts::PI / (size as f32), 1.0) }; + for i in 0..size/2 { + table.push(FFTComplex::exp(base * ((i + 1) as f32)).scale(scale)); + } + let fft = FFTBuilder::new_fft(size, forward_fft); + RDFT { table, fft, size, fwd: forward, fwd_fft: forward_fft } + } +} + + +#[cfg(test)] +mod test { + use super::*; + + fn test_fft(size: usize) { + println!("testing FFT {}", size); + let mut fin: Vec<FFTComplex> = Vec::with_capacity(size); + let mut fout1: Vec<FFTComplex> = Vec::with_capacity(size); + let mut fout2: Vec<FFTComplex> = Vec::with_capacity(size); + fin.resize(size, FFTC_ZERO); + fout1.resize(size, FFTC_ZERO); + fout2.resize(size, FFTC_ZERO); + let mut fft = FFTBuilder::new_fft(size, true); + let mut seed: u32 = 42; + for i in 0..fin.len() { + seed = seed.wrapping_mul(1664525).wrapping_add(1013904223); + let val = (seed >> 16) as i16; + fin[i].re = (val as f32) / 256.0; + seed = seed.wrapping_mul(1664525).wrapping_add(1013904223); + let val = (seed >> 16) as i16; + fin[i].im = (val as f32) / 256.0; + } + fft.do_fft(&fin, &mut fout1); + fout2.copy_from_slice(&fin); + generic_fft(&mut fout2, true); + + for i in 0..fin.len() { + assert!((fout1[i].re - fout2[i].re).abs() < 1.0); + assert!((fout1[i].im - fout2[i].im).abs() < 1.0); + } + let mut ifft = FFTBuilder::new_fft(size, false); + ifft.do_ifft_inplace(&mut fout1); + generic_fft(&mut fout2, false); + + let sc = 1.0 / (size as f32); + for i in 0..fin.len() { + assert!((fin[i].re - fout1[i].re * sc).abs() < 1.0); + assert!((fin[i].im - fout1[i].im * sc).abs() < 1.0); + assert!((fout1[i].re - fout2[i].re).abs() * sc < 1.0); + assert!((fout1[i].im - fout2[i].im).abs() * sc < 1.0); + } + } + + #[test] + fn test_ffts() { + test_fft(3); + test_fft(5); + test_fft(16); + test_fft(15); + test_fft(60); + test_fft(256); + test_fft(240); + } + + #[test] + fn test_rdft() { + let mut fin: [FFTComplex; 128] = [FFTC_ZERO; 128]; + let mut fout1: [FFTComplex; 128] = [FFTC_ZERO; 128]; + let mut rdft = RDFTBuilder::new_rdft(fin.len(), true, true); + let mut seed: u32 = 42; + for i in 0..fin.len() { + seed = seed.wrapping_mul(1664525).wrapping_add(1013904223); + let val = (seed >> 16) as i16; + fin[i].re = (val as f32) / 256.0; + seed = seed.wrapping_mul(1664525).wrapping_add(1013904223); + let val = (seed >> 16) as i16; + fin[i].im = (val as f32) / 256.0; + } + rdft.do_rdft(&fin, &mut fout1); + let mut irdft = RDFTBuilder::new_rdft(fin.len(), false, true); + irdft.do_rdft_inplace(&mut fout1); + + for i in 0..fin.len() { + let tst = fout1[i].scale(0.5/(fout1.len() as f32)); + assert!((tst.re - fin[i].re).abs() < 1.0); + assert!((tst.im - fin[i].im).abs() < 1.0); + } + } +} diff --git a/nihav-codec-support/src/dsp/mdct.rs b/nihav-codec-support/src/dsp/mdct.rs new file mode 100644 index 0000000..e6ed3dc --- /dev/null +++ b/nihav-codec-support/src/dsp/mdct.rs @@ -0,0 +1,81 @@ +//! Modified Discrete Cosine transform functionality. +use std::f32::consts; +use super::fft::*; + +/// IMDCT working context. +pub struct IMDCT { + twiddle: Vec<FFTComplex>, + fft: FFT, + size: usize, + z: Vec<FFTComplex>, +} + +/* +fn imdct(src: &[f32], dst: &mut [f32], length: usize) { + for n in 0..length*2 { + dst[n] = 0.0; + for k in 0..length { + dst[n] += src[k] * (consts::PI / (length as f32) * ((n as f32) + 0.5 + ((length/2) as f32)) * ((k as f32) + 0.5)).cos(); + } + } +}*/ + +impl IMDCT { + /// Constructs a new instance of `IMDCT` context. + pub fn new(size: usize, scaledown: bool) -> Self { + let mut twiddle: Vec<FFTComplex> = Vec::with_capacity(size / 4); + let factor = 2.0 * consts::PI / ((8 * size) as f32); + let scale = if scaledown { (1.0 / (size as f32)).sqrt() } else { 1.0 }; + for k in 0..size/4 { + twiddle.push(FFTComplex::exp(factor * ((8 * k + 1) as f32)).scale(scale)); + } + let fft = FFTBuilder::new_fft(size/4, false); + let mut z: Vec<FFTComplex> = Vec::with_capacity(size / 2); + z.resize(size / 2, FFTC_ZERO); + IMDCT { twiddle, fft, size, z } + } + /// Calculates IMDCT. + pub fn imdct(&mut self, src: &[f32], dst: &mut [f32]) { + let size2 = self.size / 2; + let size4 = self.size / 4; + let size8 = self.size / 8; + for k in 0..size4 { + let c = FFTComplex { re: src[size2 - 2 * k - 1], im: src[ 2 * k] }; + self.z[k] = c * self.twiddle[k]; + } + self.fft.do_ifft_inplace(&mut self.z); + for k in 0..size4 { + self.z[k] *= self.twiddle[k]; + } + for n in 0..size8 { + dst[ 2 * n] = -self.z[size8 + n] .im; + dst[ 2 * n + 1] = self.z[size8 - n - 1].re; + dst[ size4 + 2 * n] = -self.z[ n] .re; + dst[ size4 + 2 * n + 1] = self.z[size4 - n - 1].im; + dst[ size2 + 2 * n] = -self.z[size8 + n] .re; + dst[ size2 + 2 * n + 1] = self.z[size8 - n - 1].im; + dst[3 * size4 + 2 * n] = self.z[ n] .im; + dst[3 * size4 + 2 * n + 1] = -self.z[size4 - n - 1].re; + } + } + /// Calculates only non-mirrored part of IMDCT. + pub fn imdct_half(&mut self, src: &[f32], dst: &mut [f32]) { + let size2 = self.size / 2; + let size4 = self.size / 4; + let size8 = self.size / 8; + for k in 0..size4 { + let c = FFTComplex { re: src[size2 - 2 * k - 1], im: src[ 2 * k] }; + self.z[k] = c * self.twiddle[k]; + } + self.fft.do_ifft_inplace(&mut self.z); + for k in 0..size4 { + self.z[k] *= self.twiddle[k]; + } + for n in 0..size8 { + dst[ 2 * n] = -self.z[ n] .re; + dst[ 2 * n + 1] = self.z[size4 - n - 1].im; + dst[size4 + 2 * n] = -self.z[size8 + n] .re; + dst[size4 + 2 * n + 1] = self.z[size8 - n - 1].im; + } + } +} diff --git a/nihav-codec-support/src/dsp/mod.rs b/nihav-codec-support/src/dsp/mod.rs new file mode 100644 index 0000000..2ff322d --- /dev/null +++ b/nihav-codec-support/src/dsp/mod.rs @@ -0,0 +1,11 @@ +//! DSP routines. +#[cfg(feature="dct")] +#[allow(clippy::erasing_op)] +pub mod dct; +#[cfg(feature="fft")] +#[allow(clippy::erasing_op)] +pub mod fft; +#[cfg(feature="mdct")] +pub mod mdct; +#[cfg(feature="dsp_window")] +pub mod window; diff --git a/nihav-codec-support/src/dsp/window.rs b/nihav-codec-support/src/dsp/window.rs new file mode 100644 index 0000000..eb350e3 --- /dev/null +++ b/nihav-codec-support/src/dsp/window.rs @@ -0,0 +1,59 @@ +//! Window generating functions. +use std::f32::consts; + +/// Known window types. +#[derive(Debug,Clone,Copy,PartialEq)] +pub enum WindowType { + /// Simple square window. + Square, + /// Simple sine window. + Sine, + /// Kaiser-Bessel derived window. + KaiserBessel(f32), +} + +/// Calculates window coefficients for the requested window type and size. +/// +/// Set `half` flag to calculate only the first half of the window. +pub fn generate_window(mode: WindowType, scale: f32, size: usize, half: bool, dst: &mut [f32]) { + match mode { + WindowType::Square => { + for n in 0..size { dst[n] = scale; } + }, + WindowType::Sine => { + let param = if half { + consts::PI / ((2 * size) as f32) + } else { + consts::PI / (size as f32) + }; + for n in 0..size { + dst[n] = (((n as f32) + 0.5) * param).sin() * scale; + } + }, + WindowType::KaiserBessel(alpha) => { + let dlen = if half { size as f32 } else { (size as f32) * 0.5 }; + let alpha2 = f64::from((alpha * consts::PI / dlen) * (alpha * consts::PI / dlen)); + + let mut kb: Vec<f64> = Vec::with_capacity(size); + let mut sum = 0.0; + for n in 0..size { + let b = bessel_i0(((n * (size - n)) as f64) * alpha2); + sum += b; + kb.push(sum); + } + sum += 1.0; + for n in 0..size { + dst[n] = (kb[n] / sum).sqrt() as f32; + } + }, + }; +} + +fn bessel_i0(inval: f64) -> f64 { + let mut val: f64 = 1.0; + for n in (1..64).rev() { + val *= inval / f64::from(n * n); + val += 1.0; + } + val +} |