GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_8ic_x2_multiply_conjugate_16ic.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 115 115 100.0%
Functions: 4 4 100.0%
Branches: 14 14 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
11 #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
12
13 #include <inttypes.h>
14 #include <stdio.h>
15 #include <volk/volk_complex.h>
16
17 #ifdef LV_HAVE_AVX2
18 #include <immintrin.h>
19 /*!
20 \brief Multiplys the one complex vector with the complex conjugate of the second complex
21 vector and stores their results in the third vector \param cVector The complex vector
22 where the results will be stored \param aVector One of the complex vectors to be
23 multiplied \param bVector The complex vector which will be converted to complex
24 conjugate and multiplied \param num_points The number of complex values in aVector and
25 bVector to be multiplied together and stored into cVector
26 */
27 2 static inline void volk_8ic_x2_multiply_conjugate_16ic_a_avx2(lv_16sc_t* cVector,
28 const lv_8sc_t* aVector,
29 const lv_8sc_t* bVector,
30 unsigned int num_points)
31 {
32 2 unsigned int number = 0;
33 2 const unsigned int quarterPoints = num_points / 8;
34
35 __m256i x, y, realz, imagz;
36 2 lv_16sc_t* c = cVector;
37 2 const lv_8sc_t* a = aVector;
38 2 const lv_8sc_t* b = bVector;
39 __m256i conjugateSign =
40 2 _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
41
42
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < quarterPoints; number++) {
43 // Convert 8 bit values into 16 bit values
44 65532 x = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)a));
45 65532 y = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)b));
46
47 // Calculate the ar*cr - ai*(-ci) portions
48 32766 realz = _mm256_madd_epi16(x, y);
49
50 // Calculate the complex conjugate of the cr + ci j values
51 32766 y = _mm256_sign_epi16(y, conjugateSign);
52
53 // Shift the order of the cr and ci values
54 32766 y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
55 _MM_SHUFFLE(2, 3, 0, 1));
56
57 // Calculate the ar*(-ci) + cr*(ai)
58 32766 imagz = _mm256_madd_epi16(x, y);
59
60 // Perform the addition of products
61
62 98298 _mm256_store_si256((__m256i*)c,
63 _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz),
64 _mm256_unpackhi_epi32(realz, imagz)));
65
66 32766 a += 8;
67 32766 b += 8;
68 32766 c += 8;
69 }
70
71 2 number = quarterPoints * 8;
72 2 int16_t* c16Ptr = (int16_t*)&cVector[number];
73 2 int8_t* a8Ptr = (int8_t*)&aVector[number];
74 2 int8_t* b8Ptr = (int8_t*)&bVector[number];
75
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
76 14 float aReal = (float)*a8Ptr++;
77 14 float aImag = (float)*a8Ptr++;
78 14 lv_32fc_t aVal = lv_cmake(aReal, aImag);
79 14 float bReal = (float)*b8Ptr++;
80 14 float bImag = (float)*b8Ptr++;
81 14 lv_32fc_t bVal = lv_cmake(bReal, -bImag);
82 14 lv_32fc_t temp = aVal * bVal;
83
84 14 *c16Ptr++ = (int16_t)lv_creal(temp);
85 14 *c16Ptr++ = (int16_t)lv_cimag(temp);
86 }
87 2 }
88 #endif /* LV_HAVE_AVX2 */
89
90
91 #ifdef LV_HAVE_SSE4_1
92 #include <smmintrin.h>
93 /*!
94 \brief Multiplys the one complex vector with the complex conjugate of the second complex
95 vector and stores their results in the third vector \param cVector The complex vector
96 where the results will be stored \param aVector One of the complex vectors to be
97 multiplied \param bVector The complex vector which will be converted to complex
98 conjugate and multiplied \param num_points The number of complex values in aVector and
99 bVector to be multiplied together and stored into cVector
100 */
101 2 static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVector,
102 const lv_8sc_t* aVector,
103 const lv_8sc_t* bVector,
104 unsigned int num_points)
105 {
106 2 unsigned int number = 0;
107 2 const unsigned int quarterPoints = num_points / 4;
108
109 __m128i x, y, realz, imagz;
110 2 lv_16sc_t* c = cVector;
111 2 const lv_8sc_t* a = aVector;
112 2 const lv_8sc_t* b = bVector;
113 2 __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
114
115
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
116 // Convert into 8 bit values into 16 bit values
117 131068 x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
118 131068 y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
119
120 // Calculate the ar*cr - ai*(-ci) portions
121 65534 realz = _mm_madd_epi16(x, y);
122
123 // Calculate the complex conjugate of the cr + ci j values
124 65534 y = _mm_sign_epi16(y, conjugateSign);
125
126 // Shift the order of the cr and ci values
127 65534 y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
128 _MM_SHUFFLE(2, 3, 0, 1));
129
130 // Calculate the ar*(-ci) + cr*(ai)
131 65534 imagz = _mm_madd_epi16(x, y);
132
133 196602 _mm_store_si128((__m128i*)c,
134 _mm_packs_epi32(_mm_unpacklo_epi32(realz, imagz),
135 _mm_unpackhi_epi32(realz, imagz)));
136
137 65534 a += 4;
138 65534 b += 4;
139 65534 c += 4;
140 }
141
142 2 number = quarterPoints * 4;
143 2 int16_t* c16Ptr = (int16_t*)&cVector[number];
144 2 int8_t* a8Ptr = (int8_t*)&aVector[number];
145 2 int8_t* b8Ptr = (int8_t*)&bVector[number];
146
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
147 6 float aReal = (float)*a8Ptr++;
148 6 float aImag = (float)*a8Ptr++;
149 6 lv_32fc_t aVal = lv_cmake(aReal, aImag);
150 6 float bReal = (float)*b8Ptr++;
151 6 float bImag = (float)*b8Ptr++;
152 6 lv_32fc_t bVal = lv_cmake(bReal, -bImag);
153 6 lv_32fc_t temp = aVal * bVal;
154
155 6 *c16Ptr++ = (int16_t)lv_creal(temp);
156 6 *c16Ptr++ = (int16_t)lv_cimag(temp);
157 }
158 2 }
159 #endif /* LV_HAVE_SSE4_1 */
160
161 #ifdef LV_HAVE_GENERIC
162 /*!
163 \brief Multiplys the one complex vector with the complex conjugate of the second complex
164 vector and stores their results in the third vector \param cVector The complex vector
165 where the results will be stored \param aVector One of the complex vectors to be
166 multiplied \param bVector The complex vector which will be converted to complex
167 conjugate and multiplied \param num_points The number of complex values in aVector and
168 bVector to be multiplied together and stored into cVector
169 */
170 2 static inline void volk_8ic_x2_multiply_conjugate_16ic_generic(lv_16sc_t* cVector,
171 const lv_8sc_t* aVector,
172 const lv_8sc_t* bVector,
173 unsigned int num_points)
174 {
175 2 unsigned int number = 0;
176 2 int16_t* c16Ptr = (int16_t*)cVector;
177 2 int8_t* a8Ptr = (int8_t*)aVector;
178 2 int8_t* b8Ptr = (int8_t*)bVector;
179
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
180 262142 float aReal = (float)*a8Ptr++;
181 262142 float aImag = (float)*a8Ptr++;
182 262142 lv_32fc_t aVal = lv_cmake(aReal, aImag);
183 262142 float bReal = (float)*b8Ptr++;
184 262142 float bImag = (float)*b8Ptr++;
185 262142 lv_32fc_t bVal = lv_cmake(bReal, -bImag);
186 262142 lv_32fc_t temp = aVal * bVal;
187
188 262142 *c16Ptr++ = (int16_t)lv_creal(temp);
189 262142 *c16Ptr++ = (int16_t)lv_cimag(temp);
190 }
191 2 }
192 #endif /* LV_HAVE_GENERIC */
193
194 #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H */
195
196 #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H
197 #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H
198
199 #include <inttypes.h>
200 #include <stdio.h>
201 #include <volk/volk_complex.h>
202
203 #ifdef LV_HAVE_AVX2
204 #include <immintrin.h>
205 /*!
206 \brief Multiplys the one complex vector with the complex conjugate of the second complex
207 vector and stores their results in the third vector \param cVector The complex vector
208 where the results will be stored \param aVector One of the complex vectors to be
209 multiplied \param bVector The complex vector which will be converted to complex
210 conjugate and multiplied \param num_points The number of complex values in aVector and
211 bVector to be multiplied together and stored into cVector
212 */
213 2 static inline void volk_8ic_x2_multiply_conjugate_16ic_u_avx2(lv_16sc_t* cVector,
214 const lv_8sc_t* aVector,
215 const lv_8sc_t* bVector,
216 unsigned int num_points)
217 {
218 2 unsigned int number = 0;
219 2 const unsigned int oneEigthPoints = num_points / 8;
220
221 __m256i x, y, realz, imagz;
222 2 lv_16sc_t* c = cVector;
223 2 const lv_8sc_t* a = aVector;
224 2 const lv_8sc_t* b = bVector;
225 __m256i conjugateSign =
226 2 _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
227
228
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < oneEigthPoints; number++) {
229 // Convert 8 bit values into 16 bit values
230 65532 x = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)a));
231 65532 y = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)b));
232
233 // Calculate the ar*cr - ai*(-ci) portions
234 32766 realz = _mm256_madd_epi16(x, y);
235
236 // Calculate the complex conjugate of the cr + ci j values
237 32766 y = _mm256_sign_epi16(y, conjugateSign);
238
239 // Shift the order of the cr and ci values
240 32766 y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
241 _MM_SHUFFLE(2, 3, 0, 1));
242
243 // Calculate the ar*(-ci) + cr*(ai)
244 32766 imagz = _mm256_madd_epi16(x, y);
245
246 // Perform the addition of products
247
248 98298 _mm256_storeu_si256((__m256i*)c,
249 _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz),
250 _mm256_unpackhi_epi32(realz, imagz)));
251
252 32766 a += 8;
253 32766 b += 8;
254 32766 c += 8;
255 }
256
257 2 number = oneEigthPoints * 8;
258 2 int16_t* c16Ptr = (int16_t*)&cVector[number];
259 2 int8_t* a8Ptr = (int8_t*)&aVector[number];
260 2 int8_t* b8Ptr = (int8_t*)&bVector[number];
261
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
262 14 float aReal = (float)*a8Ptr++;
263 14 float aImag = (float)*a8Ptr++;
264 14 lv_32fc_t aVal = lv_cmake(aReal, aImag);
265 14 float bReal = (float)*b8Ptr++;
266 14 float bImag = (float)*b8Ptr++;
267 14 lv_32fc_t bVal = lv_cmake(bReal, -bImag);
268 14 lv_32fc_t temp = aVal * bVal;
269
270 14 *c16Ptr++ = (int16_t)lv_creal(temp);
271 14 *c16Ptr++ = (int16_t)lv_cimag(temp);
272 }
273 2 }
274 #endif /* LV_HAVE_AVX2 */
275
276 #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H */
277