SDRAngel  4.11.5
Developer docs for <a href="https://github.com/f4exb/sdrangel">SDRangel<\a>, an Open Source Qt5 / OpenGL 3.0+ SDR and signal analyzer frontend to various hardware.
Static Public Member Functions | List of all members
IntHalfbandFilterSTIntrinsics< HBFilterOrder > Class Template Reference

#include <inthalfbandfiltersti.h>

Static Public Member Functions

static void work (int32_t samples[HBFilterOrder][2], int32_t &iEvenAcc, int32_t &qEvenAcc, int32_t &iOddAcc, int32_t &qOddAcc)
 
static void workNA (int ptr, int32_t samples[HBFilterOrder *2][2], int32_t &iEvenAcc, int32_t &qEvenAcc, int32_t &iOddAcc, int32_t &qOddAcc)
 

Detailed Description

template<uint32_t HBFilterOrder>
class IntHalfbandFilterSTIntrinsics< HBFilterOrder >

Definition at line 35 of file inthalfbandfiltersti.h.

Member Function Documentation

◆ work()

template<uint32_t HBFilterOrder>
static void IntHalfbandFilterSTIntrinsics< HBFilterOrder >::work ( int32_t  samples[HBFilterOrder][2],
int32_t iEvenAcc,
int32_t qEvenAcc,
int32_t iOddAcc,
int32_t qOddAcc 
)
inlinestatic

Definition at line 38 of file inthalfbandfiltersti.h.

References __attribute__, and i.

42  {
43 #if defined(USE_SSE4_1)
45  int b = 0; // tail
46  const int *h = (const int*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
47  __m128i sum = _mm_setzero_si128();
48  __m128i shh, sa, sb;
49  int32_t sums[4] __attribute__ ((aligned (16)));
50 
51  for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16; i++)
52  {
53  shh = _mm_set_epi32(h[4*i], h[4*i], h[4*i], h[4*i]);
54  sa = _mm_load_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
55  sb = _mm_load_si128((__m128i*) &(samples[b][0]));
56  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
57  a -= 2;
58  b += 2;
59  shh = _mm_set_epi32(h[4*i+1], h[4*i+1], h[4*i+1], h[4*i+1]);
60  sa = _mm_load_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
61  sb = _mm_load_si128((__m128i*) &(samples[b][0]));
62  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
63  a -= 2;
64  b += 2;
65  shh = _mm_set_epi32(h[4*i+2], h[4*i+2], h[4*i+2], h[4*i+2]);
66  sa = _mm_load_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
67  sb = _mm_load_si128((__m128i*) &(samples[b][0]));
68  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
69  a -= 2;
70  b += 2;
71  shh = _mm_set_epi32(h[4*i+3], h[4*i+3], h[4*i+3], h[4*i+3]);
72  sa = _mm_load_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
73  sb = _mm_load_si128((__m128i*) &(samples[b][0]));
74  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
75  a -= 2;
76  b += 2;
77  }
78 
79  // Extract values from sum vector
80  _mm_store_si128((__m128i*) sums, sum);
81  iEvenAcc = sums[0];
82  qEvenAcc = sums[1];
83  iOddAcc = sums[2];
84  qOddAcc = sums[3];
85 #endif
86  }
int32_t i
Definition: decimators.h:244
int int32_t
Definition: rtptypes_win.h:45
typedef __attribute__

◆ workNA()

template<uint32_t HBFilterOrder>
static void IntHalfbandFilterSTIntrinsics< HBFilterOrder >::workNA ( int  ptr,
int32_t  samples[HBFilterOrder *2][2],
int32_t iEvenAcc,
int32_t qEvenAcc,
int32_t iOddAcc,
int32_t qOddAcc 
)
inlinestatic

Definition at line 89 of file inthalfbandfiltersti.h.

References __attribute__, and i.

Referenced by IntHalfbandFilterST< HBFilterOrder >::doFIR().

94  {
95 #if defined(USE_SSE4_1)
96  int a = ptr + HBFIRFilterTraits<HBFilterOrder>::hbOrder - 2; // tip
97  int b = ptr + 0; // tail
98  const int *h = (const int*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
99  __m128i sum = _mm_setzero_si128();
100  __m128i shh, sa, sb;
101  int32_t sums[4] __attribute__ ((aligned (16)));
102 
103  for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16; i++)
104  {
105  shh = _mm_set_epi32(h[4*i], h[4*i], h[4*i], h[4*i]);
106  sa = _mm_loadu_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
107  sb = _mm_loadu_si128((__m128i*) &(samples[b][0]));
108  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
109  a -= 2;
110  b += 2;
111  shh = _mm_set_epi32(h[4*i+1], h[4*i+1], h[4*i+1], h[4*i+1]);
112  sa = _mm_loadu_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
113  sb = _mm_loadu_si128((__m128i*) &(samples[b][0]));
114  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
115  a -= 2;
116  b += 2;
117  shh = _mm_set_epi32(h[4*i+2], h[4*i+2], h[4*i+2], h[4*i+2]);
118  sa = _mm_loadu_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
119  sb = _mm_loadu_si128((__m128i*) &(samples[b][0]));
120  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
121  a -= 2;
122  b += 2;
123  shh = _mm_set_epi32(h[4*i+3], h[4*i+3], h[4*i+3], h[4*i+3]);
124  sa = _mm_loadu_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
125  sb = _mm_loadu_si128((__m128i*) &(samples[b][0]));
126  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
127  a -= 2;
128  b += 2;
129  }
130 
131  // Extract values from sum vector
132  _mm_store_si128((__m128i*) sums, sum);
133  iEvenAcc = sums[0];
134  qEvenAcc = sums[1];
135  iOddAcc = sums[2];
136  qOddAcc = sums[3];
137 #endif
138  }
int32_t i
Definition: decimators.h:244
int int32_t
Definition: rtptypes_win.h:45
typedef __attribute__
+ Here is the caller graph for this function:

The documentation for this class was generated from the following file: