Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H | ||
11 | #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H | ||
12 | |||
13 | #include <inttypes.h> | ||
14 | #include <stdio.h> | ||
15 | #include <volk/volk_complex.h> | ||
16 | |||
17 | #ifdef LV_HAVE_AVX2 | ||
18 | #include <immintrin.h> | ||
19 | /*! | ||
20 | \brief Multiplys the one complex vector with the complex conjugate of the second complex | ||
21 | vector and stores their results in the third vector \param cVector The complex vector | ||
22 | where the results will be stored \param aVector One of the complex vectors to be | ||
23 | multiplied \param bVector The complex vector which will be converted to complex | ||
24 | conjugate and multiplied \param num_points The number of complex values in aVector and | ||
25 | bVector to be multiplied together and stored into cVector | ||
26 | */ | ||
27 | 2 | static inline void volk_8ic_x2_multiply_conjugate_16ic_a_avx2(lv_16sc_t* cVector, | |
28 | const lv_8sc_t* aVector, | ||
29 | const lv_8sc_t* bVector, | ||
30 | unsigned int num_points) | ||
31 | { | ||
32 | 2 | unsigned int number = 0; | |
33 | 2 | const unsigned int quarterPoints = num_points / 8; | |
34 | |||
35 | __m256i x, y, realz, imagz; | ||
36 | 2 | lv_16sc_t* c = cVector; | |
37 | 2 | const lv_8sc_t* a = aVector; | |
38 | 2 | const lv_8sc_t* b = bVector; | |
39 | __m256i conjugateSign = | ||
40 | 2 | _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1); | |
41 | |||
42 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (; number < quarterPoints; number++) { |
43 | // Convert 8 bit values into 16 bit values | ||
44 | 65532 | x = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)a)); | |
45 | 65532 | y = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)b)); | |
46 | |||
47 | // Calculate the ar*cr - ai*(-ci) portions | ||
48 | 32766 | realz = _mm256_madd_epi16(x, y); | |
49 | |||
50 | // Calculate the complex conjugate of the cr + ci j values | ||
51 | 32766 | y = _mm256_sign_epi16(y, conjugateSign); | |
52 | |||
53 | // Shift the order of the cr and ci values | ||
54 | 32766 | y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)), | |
55 | _MM_SHUFFLE(2, 3, 0, 1)); | ||
56 | |||
57 | // Calculate the ar*(-ci) + cr*(ai) | ||
58 | 32766 | imagz = _mm256_madd_epi16(x, y); | |
59 | |||
60 | // Perform the addition of products | ||
61 | |||
62 | 98298 | _mm256_store_si256((__m256i*)c, | |
63 | _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz), | ||
64 | _mm256_unpackhi_epi32(realz, imagz))); | ||
65 | |||
66 | 32766 | a += 8; | |
67 | 32766 | b += 8; | |
68 | 32766 | c += 8; | |
69 | } | ||
70 | |||
71 | 2 | number = quarterPoints * 8; | |
72 | 2 | int16_t* c16Ptr = (int16_t*)&cVector[number]; | |
73 | 2 | int8_t* a8Ptr = (int8_t*)&aVector[number]; | |
74 | 2 | int8_t* b8Ptr = (int8_t*)&bVector[number]; | |
75 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
76 | 14 | float aReal = (float)*a8Ptr++; | |
77 | 14 | float aImag = (float)*a8Ptr++; | |
78 | 14 | lv_32fc_t aVal = lv_cmake(aReal, aImag); | |
79 | 14 | float bReal = (float)*b8Ptr++; | |
80 | 14 | float bImag = (float)*b8Ptr++; | |
81 | 14 | lv_32fc_t bVal = lv_cmake(bReal, -bImag); | |
82 | 14 | lv_32fc_t temp = aVal * bVal; | |
83 | |||
84 | 14 | *c16Ptr++ = (int16_t)lv_creal(temp); | |
85 | 14 | *c16Ptr++ = (int16_t)lv_cimag(temp); | |
86 | } | ||
87 | 2 | } | |
88 | #endif /* LV_HAVE_AVX2 */ | ||
89 | |||
90 | |||
91 | #ifdef LV_HAVE_SSE4_1 | ||
92 | #include <smmintrin.h> | ||
93 | /*! | ||
94 | \brief Multiplys the one complex vector with the complex conjugate of the second complex | ||
95 | vector and stores their results in the third vector \param cVector The complex vector | ||
96 | where the results will be stored \param aVector One of the complex vectors to be | ||
97 | multiplied \param bVector The complex vector which will be converted to complex | ||
98 | conjugate and multiplied \param num_points The number of complex values in aVector and | ||
99 | bVector to be multiplied together and stored into cVector | ||
100 | */ | ||
101 | 2 | static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVector, | |
102 | const lv_8sc_t* aVector, | ||
103 | const lv_8sc_t* bVector, | ||
104 | unsigned int num_points) | ||
105 | { | ||
106 | 2 | unsigned int number = 0; | |
107 | 2 | const unsigned int quarterPoints = num_points / 4; | |
108 | |||
109 | __m128i x, y, realz, imagz; | ||
110 | 2 | lv_16sc_t* c = cVector; | |
111 | 2 | const lv_8sc_t* a = aVector; | |
112 | 2 | const lv_8sc_t* b = bVector; | |
113 | 2 | __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1); | |
114 | |||
115 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
116 | // Convert into 8 bit values into 16 bit values | ||
117 | 131068 | x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a)); | |
118 | 131068 | y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b)); | |
119 | |||
120 | // Calculate the ar*cr - ai*(-ci) portions | ||
121 | 65534 | realz = _mm_madd_epi16(x, y); | |
122 | |||
123 | // Calculate the complex conjugate of the cr + ci j values | ||
124 | 65534 | y = _mm_sign_epi16(y, conjugateSign); | |
125 | |||
126 | // Shift the order of the cr and ci values | ||
127 | 65534 | y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)), | |
128 | _MM_SHUFFLE(2, 3, 0, 1)); | ||
129 | |||
130 | // Calculate the ar*(-ci) + cr*(ai) | ||
131 | 65534 | imagz = _mm_madd_epi16(x, y); | |
132 | |||
133 | 196602 | _mm_store_si128((__m128i*)c, | |
134 | _mm_packs_epi32(_mm_unpacklo_epi32(realz, imagz), | ||
135 | _mm_unpackhi_epi32(realz, imagz))); | ||
136 | |||
137 | 65534 | a += 4; | |
138 | 65534 | b += 4; | |
139 | 65534 | c += 4; | |
140 | } | ||
141 | |||
142 | 2 | number = quarterPoints * 4; | |
143 | 2 | int16_t* c16Ptr = (int16_t*)&cVector[number]; | |
144 | 2 | int8_t* a8Ptr = (int8_t*)&aVector[number]; | |
145 | 2 | int8_t* b8Ptr = (int8_t*)&bVector[number]; | |
146 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
147 | 6 | float aReal = (float)*a8Ptr++; | |
148 | 6 | float aImag = (float)*a8Ptr++; | |
149 | 6 | lv_32fc_t aVal = lv_cmake(aReal, aImag); | |
150 | 6 | float bReal = (float)*b8Ptr++; | |
151 | 6 | float bImag = (float)*b8Ptr++; | |
152 | 6 | lv_32fc_t bVal = lv_cmake(bReal, -bImag); | |
153 | 6 | lv_32fc_t temp = aVal * bVal; | |
154 | |||
155 | 6 | *c16Ptr++ = (int16_t)lv_creal(temp); | |
156 | 6 | *c16Ptr++ = (int16_t)lv_cimag(temp); | |
157 | } | ||
158 | 2 | } | |
159 | #endif /* LV_HAVE_SSE4_1 */ | ||
160 | |||
161 | #ifdef LV_HAVE_GENERIC | ||
162 | /*! | ||
163 | \brief Multiplys the one complex vector with the complex conjugate of the second complex | ||
164 | vector and stores their results in the third vector \param cVector The complex vector | ||
165 | where the results will be stored \param aVector One of the complex vectors to be | ||
166 | multiplied \param bVector The complex vector which will be converted to complex | ||
167 | conjugate and multiplied \param num_points The number of complex values in aVector and | ||
168 | bVector to be multiplied together and stored into cVector | ||
169 | */ | ||
170 | 2 | static inline void volk_8ic_x2_multiply_conjugate_16ic_generic(lv_16sc_t* cVector, | |
171 | const lv_8sc_t* aVector, | ||
172 | const lv_8sc_t* bVector, | ||
173 | unsigned int num_points) | ||
174 | { | ||
175 | 2 | unsigned int number = 0; | |
176 | 2 | int16_t* c16Ptr = (int16_t*)cVector; | |
177 | 2 | int8_t* a8Ptr = (int8_t*)aVector; | |
178 | 2 | int8_t* b8Ptr = (int8_t*)bVector; | |
179 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
180 | 262142 | float aReal = (float)*a8Ptr++; | |
181 | 262142 | float aImag = (float)*a8Ptr++; | |
182 | 262142 | lv_32fc_t aVal = lv_cmake(aReal, aImag); | |
183 | 262142 | float bReal = (float)*b8Ptr++; | |
184 | 262142 | float bImag = (float)*b8Ptr++; | |
185 | 262142 | lv_32fc_t bVal = lv_cmake(bReal, -bImag); | |
186 | 262142 | lv_32fc_t temp = aVal * bVal; | |
187 | |||
188 | 262142 | *c16Ptr++ = (int16_t)lv_creal(temp); | |
189 | 262142 | *c16Ptr++ = (int16_t)lv_cimag(temp); | |
190 | } | ||
191 | 2 | } | |
192 | #endif /* LV_HAVE_GENERIC */ | ||
193 | |||
194 | #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H */ | ||
195 | |||
196 | #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H | ||
197 | #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H | ||
198 | |||
199 | #include <inttypes.h> | ||
200 | #include <stdio.h> | ||
201 | #include <volk/volk_complex.h> | ||
202 | |||
203 | #ifdef LV_HAVE_AVX2 | ||
204 | #include <immintrin.h> | ||
205 | /*! | ||
206 | \brief Multiplys the one complex vector with the complex conjugate of the second complex | ||
207 | vector and stores their results in the third vector \param cVector The complex vector | ||
208 | where the results will be stored \param aVector One of the complex vectors to be | ||
209 | multiplied \param bVector The complex vector which will be converted to complex | ||
210 | conjugate and multiplied \param num_points The number of complex values in aVector and | ||
211 | bVector to be multiplied together and stored into cVector | ||
212 | */ | ||
213 | 2 | static inline void volk_8ic_x2_multiply_conjugate_16ic_u_avx2(lv_16sc_t* cVector, | |
214 | const lv_8sc_t* aVector, | ||
215 | const lv_8sc_t* bVector, | ||
216 | unsigned int num_points) | ||
217 | { | ||
218 | 2 | unsigned int number = 0; | |
219 | 2 | const unsigned int oneEigthPoints = num_points / 8; | |
220 | |||
221 | __m256i x, y, realz, imagz; | ||
222 | 2 | lv_16sc_t* c = cVector; | |
223 | 2 | const lv_8sc_t* a = aVector; | |
224 | 2 | const lv_8sc_t* b = bVector; | |
225 | __m256i conjugateSign = | ||
226 | 2 | _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1); | |
227 | |||
228 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (; number < oneEigthPoints; number++) { |
229 | // Convert 8 bit values into 16 bit values | ||
230 | 65532 | x = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)a)); | |
231 | 65532 | y = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)b)); | |
232 | |||
233 | // Calculate the ar*cr - ai*(-ci) portions | ||
234 | 32766 | realz = _mm256_madd_epi16(x, y); | |
235 | |||
236 | // Calculate the complex conjugate of the cr + ci j values | ||
237 | 32766 | y = _mm256_sign_epi16(y, conjugateSign); | |
238 | |||
239 | // Shift the order of the cr and ci values | ||
240 | 32766 | y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)), | |
241 | _MM_SHUFFLE(2, 3, 0, 1)); | ||
242 | |||
243 | // Calculate the ar*(-ci) + cr*(ai) | ||
244 | 32766 | imagz = _mm256_madd_epi16(x, y); | |
245 | |||
246 | // Perform the addition of products | ||
247 | |||
248 | 98298 | _mm256_storeu_si256((__m256i*)c, | |
249 | _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz), | ||
250 | _mm256_unpackhi_epi32(realz, imagz))); | ||
251 | |||
252 | 32766 | a += 8; | |
253 | 32766 | b += 8; | |
254 | 32766 | c += 8; | |
255 | } | ||
256 | |||
257 | 2 | number = oneEigthPoints * 8; | |
258 | 2 | int16_t* c16Ptr = (int16_t*)&cVector[number]; | |
259 | 2 | int8_t* a8Ptr = (int8_t*)&aVector[number]; | |
260 | 2 | int8_t* b8Ptr = (int8_t*)&bVector[number]; | |
261 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
262 | 14 | float aReal = (float)*a8Ptr++; | |
263 | 14 | float aImag = (float)*a8Ptr++; | |
264 | 14 | lv_32fc_t aVal = lv_cmake(aReal, aImag); | |
265 | 14 | float bReal = (float)*b8Ptr++; | |
266 | 14 | float bImag = (float)*b8Ptr++; | |
267 | 14 | lv_32fc_t bVal = lv_cmake(bReal, -bImag); | |
268 | 14 | lv_32fc_t temp = aVal * bVal; | |
269 | |||
270 | 14 | *c16Ptr++ = (int16_t)lv_creal(temp); | |
271 | 14 | *c16Ptr++ = (int16_t)lv_cimag(temp); | |
272 | } | ||
273 | 2 | } | |
274 | #endif /* LV_HAVE_AVX2 */ | ||
275 | |||
276 | #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H */ | ||
277 |