48 const auto a0 = coeffs.a0 / coeffs.b0;
49 const auto a1 = coeffs.a1 / coeffs.b0;
50 const auto a2 = coeffs.a2 / coeffs.b0;
51 const auto b1 = coeffs.b1 / coeffs.b0;
52 const auto b2 = coeffs.b2 / coeffs.b0;
54 const auto a0Batch = xsimd::broadcast(a0);
55 const auto a1Batch = xsimd::broadcast(a1);
56 const auto a2Batch = xsimd::broadcast(a2);
57 const auto b1Batch = xsimd::broadcast(b1);
58 const auto b2Batch = xsimd::broadcast(b2);
59 for (
size_t i = 0; i < m_vecSize; i += m_simdSize) {
60 a0Batch.store_aligned(&m_a0[i]);
61 a1Batch.store_aligned(&m_a1[i]);
62 a2Batch.store_aligned(&m_a2[i]);
63 b1Batch.store_aligned(&m_b1[i]);
64 b2Batch.store_aligned(&m_b2[i]);
66 for (
size_t i = m_vecSize; i < N; ++i) {
94 auto operator()(std::span<SampleType, N> x)
noexcept ->
void {
95 constexpr static auto sizeBytes =
sizeof(SampleType) * N;
96 std::memcpy(m_working.data(), x.data(), sizeBytes);
97 for (
size_t i = 0; i < m_vecSize; i += m_simdSize) {
98 const auto& a0 = xsimd::load_aligned(&m_a0[i]);
99 const auto& x0 = xsimd::load_aligned(&m_working[i]);
100 const auto a0x0 = a0 * x0;
101 const auto& a1 = xsimd::load_aligned(&m_a1[i]);
102 const auto& x1 = xsimd::load_aligned(&m_x1[i]);
103 const auto a1x1 = a1 * x1;
104 const auto& a2 = xsimd::load_aligned(&m_a2[i]);
105 const auto& x2 = xsimd::load_aligned(&m_x2[i]);
106 const auto a2x2 = a2 * x2;
107 const auto& b1 = xsimd::load_aligned(&m_b1[i]);
108 const auto& y1 = xsimd::load_aligned(&m_y1[i]);
109 const auto b1y1 = b1 * y1;
110 const auto& b2 = xsimd::load_aligned(&m_b2[i]);
111 const auto& y2 = xsimd::load_aligned(&m_y2[i]);
112 const auto b2y2 = b2 * y2;
113 const auto res = a0x0 + a1x1 + a2x2 - b1y1 - b2y2;
114 x1.store_aligned(&m_x2[i]);
115 x0.store_aligned(&m_x1[i]);
116 y1.store_aligned(&m_y2[i]);
117 res.store_aligned(&m_y1[i]);
120 for (
size_t i = m_vecSize; i < N; ++i) {
121 const auto res = (m_a0[i] * m_working[i]) + (m_a1[i] * m_x1[i]) + (m_a2[i] * m_x2[i]) - (m_b1[i] * m_y1[i]) - (m_b2[i] * m_y2[i]);
123 m_x1[i] = m_working[i];
127 std::memcpy(x.data(), m_y1.data(), sizeBytes);
134 auto batch = xsimd::broadcast(0.0);
135 for (
size_t i = 0; i < m_vecSize; i += m_simdSize) {
136 batch.store_aligned(&m_working[i]);
137 batch.store_aligned(&m_x1[i]);
138 batch.store_aligned(&m_x2[i]);
139 batch.store_aligned(&m_y1[i]);
140 batch.store_aligned(&m_y2[i]);
142 for (
size_t i = m_vecSize; i < N; ++i) {
156 std::vector<SampleType, xsimd::aligned_allocator<SampleType>> m_a0, m_a1, m_a2, m_b1, m_b2;
157 std::vector<SampleType, xsimd::aligned_allocator<SampleType>> m_x1, m_x2, m_y1, m_y2;