GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_16ic_convert_32fc.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 107 107 100.0%
Functions: 7 7 100.0%
Branches: 24 26 92.3%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2016 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_16ic_convert_32fc
12 *
13 * \b Overview
14 *
15 * Converts a complex vector of 16-bits integer each component
16 * into a complex vector of 32-bits float each component.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_16ic_convert_32fc(lv_32fc_t* outputVector, const lv_16sc_t* inputVector,
21 * unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li inputVector: The complex 16-bit integer input data buffer.
25 * \li num_points: The number of data values to be converted.
26 *
27 * \b Outputs
28 * \li outputVector: pointer to a vector holding the converted vector.
29 *
30 * \b Example
31 * \code
32 * int N = 10000;
33 *
34 * unsigned int alignment = volk_get_alignment();
35 * lv_16sc_t* input = (lv_16sc_t*)volk_malloc(sizeof(lv_16sc_t)*N, alignment);
36 * lv_32fc_t* output = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
37 * volk_16ic_convert_32f(output, input, N);
38 *
39 * volk_free(input);
40 * volk_free(output);
41 * \endcode
42 */
43
44
45 #ifndef INCLUDED_volk_16ic_convert_32fc_a_H
46 #define INCLUDED_volk_16ic_convert_32fc_a_H
47
48 #include <volk/volk_complex.h>
49
50 #ifdef LV_HAVE_AVX2
51 #include <immintrin.h>
52
53 2 static inline void volk_16ic_convert_32fc_a_avx2(lv_32fc_t* outputVector,
54 const lv_16sc_t* inputVector,
55 unsigned int num_points)
56 {
57 2 const unsigned int avx_iters = num_points / 8;
58 2 unsigned int number = 0;
59 2 const int16_t* complexVectorPtr = (int16_t*)inputVector;
60 2 float* outputVectorPtr = (float*)outputVector;
61 __m256 outVal;
62 __m256i outValInt;
63 __m128i cplxValue;
64
65
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (number = 0; number < avx_iters; number++) {
66 32766 cplxValue = _mm_load_si128((__m128i*)complexVectorPtr);
67 32766 complexVectorPtr += 8;
68
69 32766 outValInt = _mm256_cvtepi16_epi32(cplxValue);
70 32766 outVal = _mm256_cvtepi32_ps(outValInt);
71 _mm256_store_ps((float*)outputVectorPtr, outVal);
72
73 32766 outputVectorPtr += 8;
74 }
75
76 2 number = avx_iters * 8;
77
2/2
✓ Branch 0 taken 262156 times.
✓ Branch 1 taken 2 times.
262158 for (; number < num_points * 2; number++) {
78 262156 *outputVectorPtr++ = (float)*complexVectorPtr++;
79 }
80 2 }
81
82 #endif /* LV_HAVE_AVX2 */
83
84 #ifdef LV_HAVE_GENERIC
85
86 2 static inline void volk_16ic_convert_32fc_generic(lv_32fc_t* outputVector,
87 const lv_16sc_t* inputVector,
88 unsigned int num_points)
89 {
90 unsigned int i;
91
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (i = 0; i < num_points; i++) {
92 262142 outputVector[i] =
93 262142 lv_cmake((float)lv_creal(inputVector[i]), (float)lv_cimag(inputVector[i]));
94 }
95 2 }
96
97 #endif /* LV_HAVE_GENERIC */
98
99
100 #ifdef LV_HAVE_SSE2
101 #include <emmintrin.h>
102
103 2 static inline void volk_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector,
104 const lv_16sc_t* inputVector,
105 unsigned int num_points)
106 {
107 2 const unsigned int sse_iters = num_points / 2;
108
109 2 const lv_16sc_t* _in = inputVector;
110 2 lv_32fc_t* _out = outputVector;
111 __m128 a;
112 unsigned int number;
113
114
2/2
✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.
131072 for (number = 0; number < sse_iters; number++) {
115 131070 a = _mm_set_ps(
116 131070 (float)(lv_cimag(_in[1])),
117 131070 (float)(lv_creal(_in[1])),
118 131070 (float)(lv_cimag(_in[0])),
119 131070 (float)(lv_creal(
120 _in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
121 _mm_store_ps((float*)_out, a);
122 131070 _in += 2;
123 131070 _out += 2;
124 }
125
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if (num_points & 1) {
126 2 *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
127 2 _in++;
128 }
129 2 }
130
131 #endif /* LV_HAVE_SSE2 */
132
133 #ifdef LV_HAVE_AVX
134 #include <immintrin.h>
135
136 2 static inline void volk_16ic_convert_32fc_a_avx(lv_32fc_t* outputVector,
137 const lv_16sc_t* inputVector,
138 unsigned int num_points)
139 {
140 2 const unsigned int sse_iters = num_points / 4;
141
142 2 const lv_16sc_t* _in = inputVector;
143 2 lv_32fc_t* _out = outputVector;
144 __m256 a;
145 unsigned int i, number;
146
147
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (number = 0; number < sse_iters; number++) {
148 65534 a = _mm256_set_ps(
149 65534 (float)(lv_cimag(_in[3])),
150 65534 (float)(lv_creal(_in[3])),
151 65534 (float)(lv_cimag(_in[2])),
152 65534 (float)(lv_creal(_in[2])),
153 65534 (float)(lv_cimag(_in[1])),
154 65534 (float)(lv_creal(_in[1])),
155 65534 (float)(lv_cimag(_in[0])),
156 65534 (float)(lv_creal(
157 _in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
158 _mm256_store_ps((float*)_out, a);
159 65534 _in += 4;
160 65534 _out += 4;
161 }
162
163
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (i = 0; i < (num_points % 4); ++i) {
164 6 *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
165 6 _in++;
166 }
167 2 }
168
169 #endif /* LV_HAVE_AVX */
170
171
172 #ifdef LV_HAVE_NEON
173 #include <arm_neon.h>
174
175 static inline void volk_16ic_convert_32fc_neon(lv_32fc_t* outputVector,
176 const lv_16sc_t* inputVector,
177 unsigned int num_points)
178 {
179 const unsigned int sse_iters = num_points / 2;
180
181 const lv_16sc_t* _in = inputVector;
182 lv_32fc_t* _out = outputVector;
183
184 int16x4_t a16x4;
185 int32x4_t a32x4;
186 float32x4_t f32x4;
187 unsigned int i, number;
188
189 for (number = 0; number < sse_iters; number++) {
190 a16x4 = vld1_s16((const int16_t*)_in);
191 __VOLK_PREFETCH(_in + 4);
192 a32x4 = vmovl_s16(a16x4);
193 f32x4 = vcvtq_f32_s32(a32x4);
194 vst1q_f32((float32_t*)_out, f32x4);
195 _in += 2;
196 _out += 2;
197 }
198 for (i = 0; i < (num_points % 2); ++i) {
199 *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
200 _in++;
201 }
202 }
203 #endif /* LV_HAVE_NEON */
204
205 #endif /* INCLUDED_volk_32fc_convert_16ic_a_H */
206
207 #ifndef INCLUDED_volk_16ic_convert_32fc_u_H
208 #define INCLUDED_volk_16ic_convert_32fc_u_H
209
210 #include <volk/volk_complex.h>
211
212
213 #ifdef LV_HAVE_AVX2
214 #include <immintrin.h>
215
216 2 static inline void volk_16ic_convert_32fc_u_avx2(lv_32fc_t* outputVector,
217 const lv_16sc_t* inputVector,
218 unsigned int num_points)
219 {
220 2 const unsigned int avx_iters = num_points / 8;
221 2 unsigned int number = 0;
222 2 const int16_t* complexVectorPtr = (int16_t*)inputVector;
223 2 float* outputVectorPtr = (float*)outputVector;
224 __m256 outVal;
225 __m256i outValInt;
226 __m128i cplxValue;
227
228
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (number = 0; number < avx_iters; number++) {
229 32766 cplxValue = _mm_loadu_si128((__m128i*)complexVectorPtr);
230 32766 complexVectorPtr += 8;
231
232 32766 outValInt = _mm256_cvtepi16_epi32(cplxValue);
233 32766 outVal = _mm256_cvtepi32_ps(outValInt);
234 _mm256_storeu_ps((float*)outputVectorPtr, outVal);
235
236 32766 outputVectorPtr += 8;
237 }
238
239 2 number = avx_iters * 8;
240
2/2
✓ Branch 0 taken 262156 times.
✓ Branch 1 taken 2 times.
262158 for (; number < num_points * 2; number++) {
241 262156 *outputVectorPtr++ = (float)*complexVectorPtr++;
242 }
243 2 }
244
245 #endif /* LV_HAVE_AVX2 */
246
247 #ifdef LV_HAVE_SSE2
248 #include <emmintrin.h>
249
250 2 static inline void volk_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector,
251 const lv_16sc_t* inputVector,
252 unsigned int num_points)
253 {
254 2 const unsigned int sse_iters = num_points / 2;
255
256 2 const lv_16sc_t* _in = inputVector;
257 2 lv_32fc_t* _out = outputVector;
258 __m128 a;
259 unsigned int number;
260
261
2/2
✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.
131072 for (number = 0; number < sse_iters; number++) {
262 131070 a = _mm_set_ps(
263 131070 (float)(lv_cimag(_in[1])),
264 131070 (float)(lv_creal(_in[1])),
265 131070 (float)(lv_cimag(_in[0])),
266 131070 (float)(lv_creal(
267 _in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
268 _mm_storeu_ps((float*)_out, a);
269 131070 _in += 2;
270 131070 _out += 2;
271 }
272
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if (num_points & 1) {
273 2 *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
274 2 _in++;
275 }
276 2 }
277
278 #endif /* LV_HAVE_SSE2 */
279
280
281 #ifdef LV_HAVE_AVX
282 #include <immintrin.h>
283
284 2 static inline void volk_16ic_convert_32fc_u_avx(lv_32fc_t* outputVector,
285 const lv_16sc_t* inputVector,
286 unsigned int num_points)
287 {
288 2 const unsigned int sse_iters = num_points / 4;
289
290 2 const lv_16sc_t* _in = inputVector;
291 2 lv_32fc_t* _out = outputVector;
292 __m256 a;
293 unsigned int i, number;
294
295
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (number = 0; number < sse_iters; number++) {
296 65534 a = _mm256_set_ps(
297 65534 (float)(lv_cimag(_in[3])),
298 65534 (float)(lv_creal(_in[3])),
299 65534 (float)(lv_cimag(_in[2])),
300 65534 (float)(lv_creal(_in[2])),
301 65534 (float)(lv_cimag(_in[1])),
302 65534 (float)(lv_creal(_in[1])),
303 65534 (float)(lv_cimag(_in[0])),
304 65534 (float)(lv_creal(
305 _in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
306 _mm256_storeu_ps((float*)_out, a);
307 65534 _in += 4;
308 65534 _out += 4;
309 }
310
311
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (i = 0; i < (num_points % 4); ++i) {
312 6 *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
313 6 _in++;
314 }
315 2 }
316
317 #endif /* LV_HAVE_AVX */
318 #endif /* INCLUDED_volk_32fc_convert_16ic_u_H */
319