SDRAngel  4.11.5
Developer docs for <a href="https://github.com/f4exb/sdrangel">SDRangel<\a>, an Open Source Qt5 / OpenGL 3.0+ SDR and signal analyzer frontend to various hardware.
inthalfbandfiltereo1i.h
Go to the documentation of this file.
1 // Copyright (C) 2016 F4EXB //
3 // written by Edouard Griffiths //
4 // //
5 // Integer half-band FIR based interpolator and decimator //
6 // This is the even/odd double buffer variant. Really useful only when SIMD is //
7 // used //
8 // //
9 // This program is free software; you can redistribute it and/or modify //
10 // it under the terms of the GNU General Public License as published by //
11 // the Free Software Foundation as version 3 of the License, or //
12 // (at your option) any later version. //
13 // //
14 // This program is distributed in the hope that it will be useful, //
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of //
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
17 // GNU General Public License V3 for more details. //
18 // //
19 // You should have received a copy of the GNU General Public License //
20 // along with this program. If not, see <http://www.gnu.org/licenses/>. //
22 
23 #ifndef SDRBASE_DSP_INTHALFBANDFILTEREO1I_H_
24 #define SDRBASE_DSP_INTHALFBANDFILTEREO1I_H_
25 
26 #include <stdint.h>
27 
28 #if defined(USE_SSE4_1)
29 #include <smmintrin.h>
30 #endif
31 
32 #include "hbfiltertraits.h"
33 
34 template<uint32_t HBFilterOrder>
36 {
37 public:
38  static void work(
39  int ptr,
40  int32_t even[2][HBFilterOrder],
41  int32_t odd[2][HBFilterOrder],
42  int32_t& iAcc, int32_t& qAcc)
43  {
44 #if defined(USE_SSE4_1)
45  int a = ptr/2 + HBFIRFilterTraits<HBFilterOrder>::hbOrder/2; // tip pointer
46  int b = ptr/2 + 1; // tail pointer
47  const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
48  __m128i sumI = _mm_setzero_si128();
49  __m128i sumQ = _mm_setzero_si128();
50  __m128i sa, sb;
51  a -= 3;
52 
53  for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16; i++)
54  {
55  if ((ptr % 2) == 0)
56  {
57  sa = _mm_shuffle_epi32(_mm_loadu_si128((__m128i*) &(even[0][a])), _MM_SHUFFLE(0,1,2,3));
58  sb = _mm_loadu_si128((__m128i*) &(even[0][b]));
59  sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
60 
61  sa = _mm_shuffle_epi32(_mm_loadu_si128((__m128i*) &(even[1][a])), _MM_SHUFFLE(0,1,2,3));
62  sb = _mm_loadu_si128((__m128i*) &(even[1][b]));
63  sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
64  }
65  else
66  {
67  sa = _mm_shuffle_epi32(_mm_loadu_si128((__m128i*) &(odd[0][a])), _MM_SHUFFLE(0,1,2,3));
68  sb = _mm_loadu_si128((__m128i*) &(odd[0][b]));
69  sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
70 
71  sa = _mm_shuffle_epi32(_mm_loadu_si128((__m128i*) &(odd[1][a])), _MM_SHUFFLE(0,1,2,3));
72  sb = _mm_loadu_si128((__m128i*) &(odd[1][b]));
73  sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
74  }
75 
76  a -= 4;
77  b += 4;
78  ++h;
79  }
80 
81  // horizontal add of four 32 bit partial sums
82 
83  sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 8));
84  sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 4));
85  iAcc = _mm_cvtsi128_si32(sumI);
86 
87  sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8));
88  sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4));
89  qAcc = _mm_cvtsi128_si32(sumQ);
90 #endif
91  }
92 };
93 
94 #endif /* SDRBASE_DSP_INTHALFBANDFILTEREO1I_H_ */
static void work(int ptr, int32_t even[2][HBFilterOrder], int32_t odd[2][HBFilterOrder], int32_t &iAcc, int32_t &qAcc)
int32_t i
Definition: decimators.h:244
int int32_t
Definition: rtptypes_win.h:45