GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32fc_s32f_deinterleave_real_16i.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 83 83 100.0%
Functions: 4 4 100.0%
Branches: 14 14 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32fc_s32f_deinterleave_real_16i
12 *
13 * \b Overview
14 *
15 * Deinterleaves the complex floating point vector and return the real
16 * part (inphase) of the samples scaled to 16-bit shorts.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_32fc_s32f_deinterleave_real_16i(int16_t* iBuffer, const lv_32fc_t*
21 * complexVector, const float scalar, unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li complexVector: The complex input vector.
25 * \li scalar: The value to be multiplied against each of the input vectors..
26 * \li num_points: The number of complex data values to be deinterleaved.
27 *
28 * \b Outputs
29 * \li iBuffer: The I buffer output data.
30 *
31 * \b Example
32 * Generate points around the unit circle and map them to integers with
33 * magnitude 50 to preserve smallest deltas.
34 * \code
35 * int N = 10;
36 * unsigned int alignment = volk_get_alignment();
37 * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
38 * int16_t* out = (int16_t*)volk_malloc(sizeof(int16_t)*N, alignment);
39 * float scale = 50.f;
40 *
41 * for(unsigned int ii = 0; ii < N/2; ++ii){
42 * // Generate points around the unit circle
43 * float real = -4.f * ((float)ii / (float)N) + 1.f;
44 * float imag = std::sqrt(1.f - real * real);
45 * in[ii] = lv_cmake(real, imag);
46 * in[ii+N/2] = lv_cmake(-real, -imag);
47 * }
48 *
49 * volk_32fc_s32f_deinterleave_real_16i(out, in, scale, N);
50 *
51 * for(unsigned int ii = 0; ii < N; ++ii){
52 * printf("out[%u] = %i\n", ii, out[ii]);
53 * }
54 *
55 * volk_free(in);
56 * volk_free(out);
57 * \endcode
58 */
59
60 #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
61 #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
62
63 #include <inttypes.h>
64 #include <stdio.h>
65 #include <volk/volk_common.h>
66
67
68 #ifdef LV_HAVE_AVX2
69 #include <immintrin.h>
70
71 static inline void
72 2 volk_32fc_s32f_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
73 const lv_32fc_t* complexVector,
74 const float scalar,
75 unsigned int num_points)
76 {
77 2 unsigned int number = 0;
78 2 const unsigned int eighthPoints = num_points / 8;
79
80 2 const float* complexVectorPtr = (float*)complexVector;
81 2 int16_t* iBufferPtr = iBuffer;
82
83 2 __m256 vScalar = _mm256_set1_ps(scalar);
84
85 __m256 cplxValue1, cplxValue2, iValue;
86 __m256i a;
87 __m128i b;
88
89 2 __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
90
91
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
92 32766 cplxValue1 = _mm256_load_ps(complexVectorPtr);
93 32766 complexVectorPtr += 8;
94
95 32766 cplxValue2 = _mm256_load_ps(complexVectorPtr);
96 32766 complexVectorPtr += 8;
97
98 // Arrange in i1i2i3i4 format
99 32766 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
100
101 32766 iValue = _mm256_mul_ps(iValue, vScalar);
102
103 32766 iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
104 32766 a = _mm256_cvtps_epi32(iValue);
105 32766 a = _mm256_packs_epi32(a, a);
106 32766 a = _mm256_permutevar8x32_epi32(a, idx);
107 32766 b = _mm256_extracti128_si256(a, 0);
108
109 _mm_store_si128((__m128i*)iBufferPtr, b);
110 32766 iBufferPtr += 8;
111 }
112
113 2 number = eighthPoints * 8;
114 2 iBufferPtr = &iBuffer[number];
115
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
116 14 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
117 14 complexVectorPtr++;
118 }
119 2 }
120
121
122 #endif /* LV_HAVE_AVX2 */
123
124 #ifdef LV_HAVE_SSE
125 #include <xmmintrin.h>
126
127 static inline void
128 2 volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t* iBuffer,
129 const lv_32fc_t* complexVector,
130 const float scalar,
131 unsigned int num_points)
132 {
133 2 unsigned int number = 0;
134 2 const unsigned int quarterPoints = num_points / 4;
135
136 2 const float* complexVectorPtr = (float*)complexVector;
137 2 int16_t* iBufferPtr = iBuffer;
138
139 2 __m128 vScalar = _mm_set_ps1(scalar);
140
141 __m128 cplxValue1, cplxValue2, iValue;
142
143 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
144
145
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
146 65534 cplxValue1 = _mm_load_ps(complexVectorPtr);
147 65534 complexVectorPtr += 4;
148
149 65534 cplxValue2 = _mm_load_ps(complexVectorPtr);
150 65534 complexVectorPtr += 4;
151
152 // Arrange in i1i2i3i4 format
153 65534 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
154
155 65534 iValue = _mm_mul_ps(iValue, vScalar);
156
157 _mm_store_ps(floatBuffer, iValue);
158 65534 *iBufferPtr++ = (int16_t)(floatBuffer[0]);
159 65534 *iBufferPtr++ = (int16_t)(floatBuffer[1]);
160 65534 *iBufferPtr++ = (int16_t)(floatBuffer[2]);
161 65534 *iBufferPtr++ = (int16_t)(floatBuffer[3]);
162 }
163
164 2 number = quarterPoints * 4;
165 2 iBufferPtr = &iBuffer[number];
166
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
167 6 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
168 6 complexVectorPtr++;
169 }
170 2 }
171
172 #endif /* LV_HAVE_SSE */
173
174
175 #ifdef LV_HAVE_GENERIC
176
177 static inline void
178 2 volk_32fc_s32f_deinterleave_real_16i_generic(int16_t* iBuffer,
179 const lv_32fc_t* complexVector,
180 const float scalar,
181 unsigned int num_points)
182 {
183 2 const float* complexVectorPtr = (float*)complexVector;
184 2 int16_t* iBufferPtr = iBuffer;
185 2 unsigned int number = 0;
186
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
187 262142 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
188 262142 complexVectorPtr++;
189 }
190 2 }
191
192 #endif /* LV_HAVE_GENERIC */
193
194 #endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H */
195
196 #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
197 #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
198
199 #include <inttypes.h>
200 #include <stdio.h>
201 #include <volk/volk_common.h>
202
203 #ifdef LV_HAVE_AVX2
204 #include <immintrin.h>
205
206 static inline void
207 2 volk_32fc_s32f_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
208 const lv_32fc_t* complexVector,
209 const float scalar,
210 unsigned int num_points)
211 {
212 2 unsigned int number = 0;
213 2 const unsigned int eighthPoints = num_points / 8;
214
215 2 const float* complexVectorPtr = (float*)complexVector;
216 2 int16_t* iBufferPtr = iBuffer;
217
218 2 __m256 vScalar = _mm256_set1_ps(scalar);
219
220 __m256 cplxValue1, cplxValue2, iValue;
221 __m256i a;
222 __m128i b;
223
224 2 __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
225
226
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
227 32766 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
228 32766 complexVectorPtr += 8;
229
230 32766 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
231 32766 complexVectorPtr += 8;
232
233 // Arrange in i1i2i3i4 format
234 32766 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
235
236 32766 iValue = _mm256_mul_ps(iValue, vScalar);
237
238 32766 iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
239 32766 a = _mm256_cvtps_epi32(iValue);
240 32766 a = _mm256_packs_epi32(a, a);
241 32766 a = _mm256_permutevar8x32_epi32(a, idx);
242 32766 b = _mm256_extracti128_si256(a, 0);
243
244 _mm_storeu_si128((__m128i*)iBufferPtr, b);
245 32766 iBufferPtr += 8;
246 }
247
248 2 number = eighthPoints * 8;
249 2 iBufferPtr = &iBuffer[number];
250
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
251 14 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
252 14 complexVectorPtr++;
253 }
254 2 }
255
256 #endif /* LV_HAVE_AVX2 */
257
258 #endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H */
259