GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32f_x2_s32f_interleave_16ic.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 117 117 100.0%
Functions: 5 5 100.0%
Branches: 18 18 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32f_x2_s32f_interleave_16ic
12 *
13 * \b Overview
14 *
15 * Takes input vector iBuffer as the real (inphase) part and input
16 * vector qBuffer as the imag (quadrature) part and combines them into
17 * a complex output vector. The output is scaled by the input scalar
18 * value and convert to a 16-bit short comlex number.
19 *
20 * <b>Dispatcher Prototype</b>
21 * \code
22 * void volk_32f_x2_s32f_interleave_16ic(lv_16sc_t* complexVector, const float* iBuffer,
23 * const float* qBuffer, const float scalar, unsigned int num_points) \endcode
24 *
25 * \b Inputs
26 * \li iBuffer: Input vector of samples for the real part.
27 * \li qBuffer: Input vector of samples for the imaginary part.
28 * \;i scalar: The scalar value used to scale the values before converting to shorts.
29 * \li num_points: The number of values in both input vectors.
30 *
31 * \b Outputs
32 * \li complexVector: The output vector of complex numbers.
33 *
34 * \b Example
35 * Generate points around the unit circle and convert to complex integers.
36 * \code
37 * int N = 10;
38 * unsigned int alignment = volk_get_alignment();
39 * float* imag = (float*)volk_malloc(sizeof(float)*N, alignment);
40 * float* real = (float*)volk_malloc(sizeof(float)*N, alignment);
41 * lv_16sc_t* out = (lv_16sc_t*)volk_malloc(sizeof(lv_16sc_t)*N, alignment);
42 *
43 * for(unsigned int ii = 0; ii < N; ++ii){
44 * real[ii] = 2.f * ((float)ii / (float)N) - 1.f;
45 * imag[ii] = std::sqrt(1.f - real[ii] * real[ii]);
46 * }
47 * // Normalize by smallest delta (0.02 in this example)
48 * float scale = 50.f;
49 *
50 * volk_32f_x2_s32f_interleave_16ic(out, imag, real, scale, N);
51 *
52 * for(unsigned int ii = 0; ii < N; ++ii){
53 * printf("out[%u] = %i + %ij\n", ii, std::real(out[ii]), std::imag(out[ii]));
54 * }
55 *
56 * volk_free(imag);
57 * volk_free(real);
58 * volk_free(out);
59 * \endcode
60 */
61
62 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
63 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
64
65 #include <inttypes.h>
66 #include <stdio.h>
67 #include <volk/volk_common.h>
68
69 #ifdef LV_HAVE_AVX2
70 #include <immintrin.h>
71
72 2 static inline void volk_32f_x2_s32f_interleave_16ic_a_avx2(lv_16sc_t* complexVector,
73 const float* iBuffer,
74 const float* qBuffer,
75 const float scalar,
76 unsigned int num_points)
77 {
78 2 unsigned int number = 0;
79 2 const float* iBufferPtr = iBuffer;
80 2 const float* qBufferPtr = qBuffer;
81
82 2 __m256 vScalar = _mm256_set1_ps(scalar);
83
84 2 const unsigned int eighthPoints = num_points / 8;
85
86 __m256 iValue, qValue, cplxValue1, cplxValue2;
87 __m256i intValue1, intValue2;
88
89 2 int16_t* complexVectorPtr = (int16_t*)complexVector;
90
91
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
92 32766 iValue = _mm256_load_ps(iBufferPtr);
93 32766 qValue = _mm256_load_ps(qBufferPtr);
94
95 // Interleaves the lower two values in the i and q variables into one buffer
96 32766 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
97 32766 cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
98
99 // Interleaves the upper two values in the i and q variables into one buffer
100 32766 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
101 32766 cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
102
103 32766 intValue1 = _mm256_cvtps_epi32(cplxValue1);
104 32766 intValue2 = _mm256_cvtps_epi32(cplxValue2);
105
106 32766 intValue1 = _mm256_packs_epi32(intValue1, intValue2);
107
108 _mm256_store_si256((__m256i*)complexVectorPtr, intValue1);
109 32766 complexVectorPtr += 16;
110
111 32766 iBufferPtr += 8;
112 32766 qBufferPtr += 8;
113 }
114
115 2 number = eighthPoints * 8;
116 2 complexVectorPtr = (int16_t*)(&complexVector[number]);
117
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
118 14 *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
119 14 *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
120 }
121 2 }
122 #endif /* LV_HAVE_AVX2 */
123
124
125 #ifdef LV_HAVE_SSE2
126 #include <emmintrin.h>
127
128 2 static inline void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t* complexVector,
129 const float* iBuffer,
130 const float* qBuffer,
131 const float scalar,
132 unsigned int num_points)
133 {
134 2 unsigned int number = 0;
135 2 const float* iBufferPtr = iBuffer;
136 2 const float* qBufferPtr = qBuffer;
137
138 2 __m128 vScalar = _mm_set_ps1(scalar);
139
140 2 const unsigned int quarterPoints = num_points / 4;
141
142 __m128 iValue, qValue, cplxValue1, cplxValue2;
143 __m128i intValue1, intValue2;
144
145 2 int16_t* complexVectorPtr = (int16_t*)complexVector;
146
147
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
148 65534 iValue = _mm_load_ps(iBufferPtr);
149 65534 qValue = _mm_load_ps(qBufferPtr);
150
151 // Interleaves the lower two values in the i and q variables into one buffer
152 65534 cplxValue1 = _mm_unpacklo_ps(iValue, qValue);
153 65534 cplxValue1 = _mm_mul_ps(cplxValue1, vScalar);
154
155 // Interleaves the upper two values in the i and q variables into one buffer
156 65534 cplxValue2 = _mm_unpackhi_ps(iValue, qValue);
157 65534 cplxValue2 = _mm_mul_ps(cplxValue2, vScalar);
158
159 65534 intValue1 = _mm_cvtps_epi32(cplxValue1);
160 65534 intValue2 = _mm_cvtps_epi32(cplxValue2);
161
162 65534 intValue1 = _mm_packs_epi32(intValue1, intValue2);
163
164 _mm_store_si128((__m128i*)complexVectorPtr, intValue1);
165 65534 complexVectorPtr += 8;
166
167 65534 iBufferPtr += 4;
168 65534 qBufferPtr += 4;
169 }
170
171 2 number = quarterPoints * 4;
172 2 complexVectorPtr = (int16_t*)(&complexVector[number]);
173
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
174 6 *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
175 6 *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
176 }
177 2 }
178 #endif /* LV_HAVE_SSE2 */
179
180
181 #ifdef LV_HAVE_SSE
182 #include <xmmintrin.h>
183
184 2 static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t* complexVector,
185 const float* iBuffer,
186 const float* qBuffer,
187 const float scalar,
188 unsigned int num_points)
189 {
190 2 unsigned int number = 0;
191 2 const float* iBufferPtr = iBuffer;
192 2 const float* qBufferPtr = qBuffer;
193
194 2 __m128 vScalar = _mm_set_ps1(scalar);
195
196 2 const unsigned int quarterPoints = num_points / 4;
197
198 __m128 iValue, qValue, cplxValue;
199
200 2 int16_t* complexVectorPtr = (int16_t*)complexVector;
201
202 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
203
204
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
205 65534 iValue = _mm_load_ps(iBufferPtr);
206 65534 qValue = _mm_load_ps(qBufferPtr);
207
208 // Interleaves the lower two values in the i and q variables into one buffer
209 65534 cplxValue = _mm_unpacklo_ps(iValue, qValue);
210 65534 cplxValue = _mm_mul_ps(cplxValue, vScalar);
211
212 _mm_store_ps(floatBuffer, cplxValue);
213
214 65534 *complexVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
215 65534 *complexVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
216 65534 *complexVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
217 65534 *complexVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
218
219 // Interleaves the upper two values in the i and q variables into one buffer
220 65534 cplxValue = _mm_unpackhi_ps(iValue, qValue);
221 65534 cplxValue = _mm_mul_ps(cplxValue, vScalar);
222
223 _mm_store_ps(floatBuffer, cplxValue);
224
225 65534 *complexVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
226 65534 *complexVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
227 65534 *complexVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
228 65534 *complexVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
229
230 65534 iBufferPtr += 4;
231 65534 qBufferPtr += 4;
232 }
233
234 2 number = quarterPoints * 4;
235 2 complexVectorPtr = (int16_t*)(&complexVector[number]);
236
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
237 6 *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
238 6 *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
239 }
240 2 }
241 #endif /* LV_HAVE_SSE */
242
243
244 #ifdef LV_HAVE_GENERIC
245
246 2 static inline void volk_32f_x2_s32f_interleave_16ic_generic(lv_16sc_t* complexVector,
247 const float* iBuffer,
248 const float* qBuffer,
249 const float scalar,
250 unsigned int num_points)
251 {
252 2 int16_t* complexVectorPtr = (int16_t*)complexVector;
253 2 const float* iBufferPtr = iBuffer;
254 2 const float* qBufferPtr = qBuffer;
255 2 unsigned int number = 0;
256
257
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
258 262142 *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
259 262142 *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
260 }
261 2 }
262 #endif /* LV_HAVE_GENERIC */
263
264
265 #endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H */
266
267 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H
268 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H
269
270 #include <inttypes.h>
271 #include <stdio.h>
272 #include <volk/volk_common.h>
273
274 #ifdef LV_HAVE_AVX2
275 #include <immintrin.h>
276
277 2 static inline void volk_32f_x2_s32f_interleave_16ic_u_avx2(lv_16sc_t* complexVector,
278 const float* iBuffer,
279 const float* qBuffer,
280 const float scalar,
281 unsigned int num_points)
282 {
283 2 unsigned int number = 0;
284 2 const float* iBufferPtr = iBuffer;
285 2 const float* qBufferPtr = qBuffer;
286
287 2 __m256 vScalar = _mm256_set1_ps(scalar);
288
289 2 const unsigned int eighthPoints = num_points / 8;
290
291 __m256 iValue, qValue, cplxValue1, cplxValue2;
292 __m256i intValue1, intValue2;
293
294 2 int16_t* complexVectorPtr = (int16_t*)complexVector;
295
296
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
297 32766 iValue = _mm256_loadu_ps(iBufferPtr);
298 32766 qValue = _mm256_loadu_ps(qBufferPtr);
299
300 // Interleaves the lower two values in the i and q variables into one buffer
301 32766 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
302 32766 cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
303
304 // Interleaves the upper two values in the i and q variables into one buffer
305 32766 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
306 32766 cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
307
308 32766 intValue1 = _mm256_cvtps_epi32(cplxValue1);
309 32766 intValue2 = _mm256_cvtps_epi32(cplxValue2);
310
311 32766 intValue1 = _mm256_packs_epi32(intValue1, intValue2);
312
313 _mm256_storeu_si256((__m256i*)complexVectorPtr, intValue1);
314 32766 complexVectorPtr += 16;
315
316 32766 iBufferPtr += 8;
317 32766 qBufferPtr += 8;
318 }
319
320 2 number = eighthPoints * 8;
321 2 complexVectorPtr = (int16_t*)(&complexVector[number]);
322
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
323 14 *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
324 14 *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
325 }
326 2 }
327 #endif /* LV_HAVE_AVX2 */
328
329
330 #endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H */
331