#include <inthalfbandfiltersti.h>
template<uint32_t HBFilterOrder>
class IntHalfbandFilterSTIntrinsics< HBFilterOrder >
Definition at line 35 of file inthalfbandfiltersti.h.
◆ work()
template<uint32_t HBFilterOrder>
Definition at line 38 of file inthalfbandfiltersti.h.
References __attribute__, and i.
43 #if defined(USE_SSE4_1) 47 __m128i sum = _mm_setzero_si128();
51 for (
int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16;
i++)
53 shh = _mm_set_epi32(h[4*
i], h[4*i], h[4*i], h[4*i]);
54 sa = _mm_load_si128((__m128i*) &(samples[a][0]));
55 sb = _mm_load_si128((__m128i*) &(samples[b][0]));
56 sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
59 shh = _mm_set_epi32(h[4*
i+1], h[4*
i+1], h[4*
i+1], h[4*
i+1]);
60 sa = _mm_load_si128((__m128i*) &(samples[a][0]));
61 sb = _mm_load_si128((__m128i*) &(samples[b][0]));
62 sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
65 shh = _mm_set_epi32(h[4*
i+2], h[4*
i+2], h[4*
i+2], h[4*
i+2]);
66 sa = _mm_load_si128((__m128i*) &(samples[a][0]));
67 sb = _mm_load_si128((__m128i*) &(samples[b][0]));
68 sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
71 shh = _mm_set_epi32(h[4*
i+3], h[4*
i+3], h[4*
i+3], h[4*
i+3]);
72 sa = _mm_load_si128((__m128i*) &(samples[a][0]));
73 sb = _mm_load_si128((__m128i*) &(samples[b][0]));
74 sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
80 _mm_store_si128((__m128i*) sums, sum);
◆ workNA()
template<uint32_t HBFilterOrder>
Definition at line 89 of file inthalfbandfiltersti.h.
References __attribute__, and i.
Referenced by IntHalfbandFilterST< HBFilterOrder >::doFIR().
95 #if defined(USE_SSE4_1) 99 __m128i sum = _mm_setzero_si128();
103 for (
int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16;
i++)
105 shh = _mm_set_epi32(h[4*
i], h[4*i], h[4*i], h[4*i]);
106 sa = _mm_loadu_si128((__m128i*) &(samples[a][0]));
107 sb = _mm_loadu_si128((__m128i*) &(samples[b][0]));
108 sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
111 shh = _mm_set_epi32(h[4*
i+1], h[4*
i+1], h[4*
i+1], h[4*
i+1]);
112 sa = _mm_loadu_si128((__m128i*) &(samples[a][0]));
113 sb = _mm_loadu_si128((__m128i*) &(samples[b][0]));
114 sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
117 shh = _mm_set_epi32(h[4*
i+2], h[4*
i+2], h[4*
i+2], h[4*
i+2]);
118 sa = _mm_loadu_si128((__m128i*) &(samples[a][0]));
119 sb = _mm_loadu_si128((__m128i*) &(samples[b][0]));
120 sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
123 shh = _mm_set_epi32(h[4*
i+3], h[4*
i+3], h[4*
i+3], h[4*
i+3]);
124 sa = _mm_loadu_si128((__m128i*) &(samples[a][0]));
125 sb = _mm_loadu_si128((__m128i*) &(samples[b][0]));
126 sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
132 _mm_store_si128((__m128i*) sums, sum);
The documentation for this class was generated from the following file: