SDRAngel  4.11.5
Developer docs for <a href="https://github.com/f4exb/sdrangel">SDRangel<\a>, an Open Source Qt5 / OpenGL 3.0+ SDR and signal analyzer frontend to various hardware.
inthalfbandfiltersti.h
Go to the documentation of this file.
1 // Copyright (C) 2016 F4EXB //
3 // written by Edouard Griffiths //
4 // //
5 // Integer half-band FIR based interpolator and decimator //
6 // This is the even/odd and I/Q stride with double buffering variant //
7 // This is the SIMD intrinsics code //
8 // //
9 // This program is free software; you can redistribute it and/or modify //
10 // it under the terms of the GNU General Public License as published by //
11 // the Free Software Foundation as version 3 of the License, or //
12 // (at your option) any later version. //
13 // //
14 // This program is distributed in the hope that it will be useful, //
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of //
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
17 // GNU General Public License V3 for more details. //
18 // //
19 // You should have received a copy of the GNU General Public License //
20 // along with this program. If not, see <http://www.gnu.org/licenses/>. //
22 
23 #ifndef SDRBASE_DSP_INTHALFBANDFILTERSTI_H_
24 #define SDRBASE_DSP_INTHALFBANDFILTERSTI_H_
25 
26 #include <stdint.h>
27 
28 #if defined(USE_SSE4_1)
29 #include <smmintrin.h>
30 #endif
31 
32 #include "hbfiltertraits.h"
33 
34 template<uint32_t HBFilterOrder>
36 {
37 public:
38  static void work(
39  int32_t samples[HBFilterOrder][2],
40  int32_t& iEvenAcc, int32_t& qEvenAcc,
41  int32_t& iOddAcc, int32_t& qOddAcc)
42  {
43 #if defined(USE_SSE4_1)
45  int b = 0; // tail
46  const int *h = (const int*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
47  __m128i sum = _mm_setzero_si128();
48  __m128i shh, sa, sb;
49  int32_t sums[4] __attribute__ ((aligned (16)));
50 
51  for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16; i++)
52  {
53  shh = _mm_set_epi32(h[4*i], h[4*i], h[4*i], h[4*i]);
54  sa = _mm_load_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
55  sb = _mm_load_si128((__m128i*) &(samples[b][0]));
56  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
57  a -= 2;
58  b += 2;
59  shh = _mm_set_epi32(h[4*i+1], h[4*i+1], h[4*i+1], h[4*i+1]);
60  sa = _mm_load_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
61  sb = _mm_load_si128((__m128i*) &(samples[b][0]));
62  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
63  a -= 2;
64  b += 2;
65  shh = _mm_set_epi32(h[4*i+2], h[4*i+2], h[4*i+2], h[4*i+2]);
66  sa = _mm_load_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
67  sb = _mm_load_si128((__m128i*) &(samples[b][0]));
68  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
69  a -= 2;
70  b += 2;
71  shh = _mm_set_epi32(h[4*i+3], h[4*i+3], h[4*i+3], h[4*i+3]);
72  sa = _mm_load_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
73  sb = _mm_load_si128((__m128i*) &(samples[b][0]));
74  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
75  a -= 2;
76  b += 2;
77  }
78 
79  // Extract values from sum vector
80  _mm_store_si128((__m128i*) sums, sum);
81  iEvenAcc = sums[0];
82  qEvenAcc = sums[1];
83  iOddAcc = sums[2];
84  qOddAcc = sums[3];
85 #endif
86  }
87 
88  // not aligned version
89  static void workNA(
90  int ptr,
91  int32_t samples[HBFilterOrder*2][2],
92  int32_t& iEvenAcc, int32_t& qEvenAcc,
93  int32_t& iOddAcc, int32_t& qOddAcc)
94  {
95 #if defined(USE_SSE4_1)
96  int a = ptr + HBFIRFilterTraits<HBFilterOrder>::hbOrder - 2; // tip
97  int b = ptr + 0; // tail
98  const int *h = (const int*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
99  __m128i sum = _mm_setzero_si128();
100  __m128i shh, sa, sb;
101  int32_t sums[4] __attribute__ ((aligned (16)));
102 
103  for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16; i++)
104  {
105  shh = _mm_set_epi32(h[4*i], h[4*i], h[4*i], h[4*i]);
106  sa = _mm_loadu_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
107  sb = _mm_loadu_si128((__m128i*) &(samples[b][0]));
108  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
109  a -= 2;
110  b += 2;
111  shh = _mm_set_epi32(h[4*i+1], h[4*i+1], h[4*i+1], h[4*i+1]);
112  sa = _mm_loadu_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
113  sb = _mm_loadu_si128((__m128i*) &(samples[b][0]));
114  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
115  a -= 2;
116  b += 2;
117  shh = _mm_set_epi32(h[4*i+2], h[4*i+2], h[4*i+2], h[4*i+2]);
118  sa = _mm_loadu_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
119  sb = _mm_loadu_si128((__m128i*) &(samples[b][0]));
120  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
121  a -= 2;
122  b += 2;
123  shh = _mm_set_epi32(h[4*i+3], h[4*i+3], h[4*i+3], h[4*i+3]);
124  sa = _mm_loadu_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq
125  sb = _mm_loadu_si128((__m128i*) &(samples[b][0]));
126  sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh));
127  a -= 2;
128  b += 2;
129  }
130 
131  // Extract values from sum vector
132  _mm_store_si128((__m128i*) sums, sum);
133  iEvenAcc = sums[0];
134  qEvenAcc = sums[1];
135  iOddAcc = sums[2];
136  qOddAcc = sums[3];
137 #endif
138  }
139 };
140 
141 
142 
143 #endif /* SDRBASE_DSP_INTHALFBANDFILTERSTI_H_ */
int32_t i
Definition: decimators.h:244
int int32_t
Definition: rtptypes_win.h:45
typedef __attribute__
static void workNA(int ptr, int32_t samples[HBFilterOrder *2][2], int32_t &iEvenAcc, int32_t &qEvenAcc, int32_t &iOddAcc, int32_t &qOddAcc)
static void work(int32_t samples[HBFilterOrder][2], int32_t &iEvenAcc, int32_t &qEvenAcc, int32_t &iOddAcc, int32_t &qOddAcc)