Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | /*! | ||
11 | * \page volk_32f_x2_s32f_interleave_16ic | ||
12 | * | ||
13 | * \b Overview | ||
14 | * | ||
15 | * Takes input vector iBuffer as the real (inphase) part and input | ||
16 | * vector qBuffer as the imag (quadrature) part and combines them into | ||
17 | * a complex output vector. The output is scaled by the input scalar | ||
18 | * value and convert to a 16-bit short comlex number. | ||
19 | * | ||
20 | * <b>Dispatcher Prototype</b> | ||
21 | * \code | ||
22 | * void volk_32f_x2_s32f_interleave_16ic(lv_16sc_t* complexVector, const float* iBuffer, | ||
23 | * const float* qBuffer, const float scalar, unsigned int num_points) \endcode | ||
24 | * | ||
25 | * \b Inputs | ||
26 | * \li iBuffer: Input vector of samples for the real part. | ||
27 | * \li qBuffer: Input vector of samples for the imaginary part. | ||
28 | * \;i scalar: The scalar value used to scale the values before converting to shorts. | ||
29 | * \li num_points: The number of values in both input vectors. | ||
30 | * | ||
31 | * \b Outputs | ||
32 | * \li complexVector: The output vector of complex numbers. | ||
33 | * | ||
34 | * \b Example | ||
35 | * Generate points around the unit circle and convert to complex integers. | ||
36 | * \code | ||
37 | * int N = 10; | ||
38 | * unsigned int alignment = volk_get_alignment(); | ||
39 | * float* imag = (float*)volk_malloc(sizeof(float)*N, alignment); | ||
40 | * float* real = (float*)volk_malloc(sizeof(float)*N, alignment); | ||
41 | * lv_16sc_t* out = (lv_16sc_t*)volk_malloc(sizeof(lv_16sc_t)*N, alignment); | ||
42 | * | ||
43 | * for(unsigned int ii = 0; ii < N; ++ii){ | ||
44 | * real[ii] = 2.f * ((float)ii / (float)N) - 1.f; | ||
45 | * imag[ii] = std::sqrt(1.f - real[ii] * real[ii]); | ||
46 | * } | ||
47 | * // Normalize by smallest delta (0.02 in this example) | ||
48 | * float scale = 50.f; | ||
49 | * | ||
50 | * volk_32f_x2_s32f_interleave_16ic(out, imag, real, scale, N); | ||
51 | * | ||
52 | * for(unsigned int ii = 0; ii < N; ++ii){ | ||
53 | * printf("out[%u] = %i + %ij\n", ii, std::real(out[ii]), std::imag(out[ii])); | ||
54 | * } | ||
55 | * | ||
56 | * volk_free(imag); | ||
57 | * volk_free(real); | ||
58 | * volk_free(out); | ||
59 | * \endcode | ||
60 | */ | ||
61 | |||
62 | #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H | ||
63 | #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H | ||
64 | |||
65 | #include <inttypes.h> | ||
66 | #include <stdio.h> | ||
67 | #include <volk/volk_common.h> | ||
68 | |||
69 | #ifdef LV_HAVE_AVX2 | ||
70 | #include <immintrin.h> | ||
71 | |||
72 | 2 | static inline void volk_32f_x2_s32f_interleave_16ic_a_avx2(lv_16sc_t* complexVector, | |
73 | const float* iBuffer, | ||
74 | const float* qBuffer, | ||
75 | const float scalar, | ||
76 | unsigned int num_points) | ||
77 | { | ||
78 | 2 | unsigned int number = 0; | |
79 | 2 | const float* iBufferPtr = iBuffer; | |
80 | 2 | const float* qBufferPtr = qBuffer; | |
81 | |||
82 | 2 | __m256 vScalar = _mm256_set1_ps(scalar); | |
83 | |||
84 | 2 | const unsigned int eighthPoints = num_points / 8; | |
85 | |||
86 | __m256 iValue, qValue, cplxValue1, cplxValue2; | ||
87 | __m256i intValue1, intValue2; | ||
88 | |||
89 | 2 | int16_t* complexVectorPtr = (int16_t*)complexVector; | |
90 | |||
91 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (; number < eighthPoints; number++) { |
92 | 32766 | iValue = _mm256_load_ps(iBufferPtr); | |
93 | 32766 | qValue = _mm256_load_ps(qBufferPtr); | |
94 | |||
95 | // Interleaves the lower two values in the i and q variables into one buffer | ||
96 | 32766 | cplxValue1 = _mm256_unpacklo_ps(iValue, qValue); | |
97 | 32766 | cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar); | |
98 | |||
99 | // Interleaves the upper two values in the i and q variables into one buffer | ||
100 | 32766 | cplxValue2 = _mm256_unpackhi_ps(iValue, qValue); | |
101 | 32766 | cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar); | |
102 | |||
103 | 32766 | intValue1 = _mm256_cvtps_epi32(cplxValue1); | |
104 | 32766 | intValue2 = _mm256_cvtps_epi32(cplxValue2); | |
105 | |||
106 | 32766 | intValue1 = _mm256_packs_epi32(intValue1, intValue2); | |
107 | |||
108 | _mm256_store_si256((__m256i*)complexVectorPtr, intValue1); | ||
109 | 32766 | complexVectorPtr += 16; | |
110 | |||
111 | 32766 | iBufferPtr += 8; | |
112 | 32766 | qBufferPtr += 8; | |
113 | } | ||
114 | |||
115 | 2 | number = eighthPoints * 8; | |
116 | 2 | complexVectorPtr = (int16_t*)(&complexVector[number]); | |
117 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
118 | 14 | *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar); | |
119 | 14 | *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar); | |
120 | } | ||
121 | 2 | } | |
122 | #endif /* LV_HAVE_AVX2 */ | ||
123 | |||
124 | |||
125 | #ifdef LV_HAVE_SSE2 | ||
126 | #include <emmintrin.h> | ||
127 | |||
128 | 2 | static inline void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t* complexVector, | |
129 | const float* iBuffer, | ||
130 | const float* qBuffer, | ||
131 | const float scalar, | ||
132 | unsigned int num_points) | ||
133 | { | ||
134 | 2 | unsigned int number = 0; | |
135 | 2 | const float* iBufferPtr = iBuffer; | |
136 | 2 | const float* qBufferPtr = qBuffer; | |
137 | |||
138 | 2 | __m128 vScalar = _mm_set_ps1(scalar); | |
139 | |||
140 | 2 | const unsigned int quarterPoints = num_points / 4; | |
141 | |||
142 | __m128 iValue, qValue, cplxValue1, cplxValue2; | ||
143 | __m128i intValue1, intValue2; | ||
144 | |||
145 | 2 | int16_t* complexVectorPtr = (int16_t*)complexVector; | |
146 | |||
147 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
148 | 65534 | iValue = _mm_load_ps(iBufferPtr); | |
149 | 65534 | qValue = _mm_load_ps(qBufferPtr); | |
150 | |||
151 | // Interleaves the lower two values in the i and q variables into one buffer | ||
152 | 65534 | cplxValue1 = _mm_unpacklo_ps(iValue, qValue); | |
153 | 65534 | cplxValue1 = _mm_mul_ps(cplxValue1, vScalar); | |
154 | |||
155 | // Interleaves the upper two values in the i and q variables into one buffer | ||
156 | 65534 | cplxValue2 = _mm_unpackhi_ps(iValue, qValue); | |
157 | 65534 | cplxValue2 = _mm_mul_ps(cplxValue2, vScalar); | |
158 | |||
159 | 65534 | intValue1 = _mm_cvtps_epi32(cplxValue1); | |
160 | 65534 | intValue2 = _mm_cvtps_epi32(cplxValue2); | |
161 | |||
162 | 65534 | intValue1 = _mm_packs_epi32(intValue1, intValue2); | |
163 | |||
164 | _mm_store_si128((__m128i*)complexVectorPtr, intValue1); | ||
165 | 65534 | complexVectorPtr += 8; | |
166 | |||
167 | 65534 | iBufferPtr += 4; | |
168 | 65534 | qBufferPtr += 4; | |
169 | } | ||
170 | |||
171 | 2 | number = quarterPoints * 4; | |
172 | 2 | complexVectorPtr = (int16_t*)(&complexVector[number]); | |
173 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
174 | 6 | *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar); | |
175 | 6 | *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar); | |
176 | } | ||
177 | 2 | } | |
178 | #endif /* LV_HAVE_SSE2 */ | ||
179 | |||
180 | |||
181 | #ifdef LV_HAVE_SSE | ||
182 | #include <xmmintrin.h> | ||
183 | |||
184 | 2 | static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t* complexVector, | |
185 | const float* iBuffer, | ||
186 | const float* qBuffer, | ||
187 | const float scalar, | ||
188 | unsigned int num_points) | ||
189 | { | ||
190 | 2 | unsigned int number = 0; | |
191 | 2 | const float* iBufferPtr = iBuffer; | |
192 | 2 | const float* qBufferPtr = qBuffer; | |
193 | |||
194 | 2 | __m128 vScalar = _mm_set_ps1(scalar); | |
195 | |||
196 | 2 | const unsigned int quarterPoints = num_points / 4; | |
197 | |||
198 | __m128 iValue, qValue, cplxValue; | ||
199 | |||
200 | 2 | int16_t* complexVectorPtr = (int16_t*)complexVector; | |
201 | |||
202 | __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; | ||
203 | |||
204 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
205 | 65534 | iValue = _mm_load_ps(iBufferPtr); | |
206 | 65534 | qValue = _mm_load_ps(qBufferPtr); | |
207 | |||
208 | // Interleaves the lower two values in the i and q variables into one buffer | ||
209 | 65534 | cplxValue = _mm_unpacklo_ps(iValue, qValue); | |
210 | 65534 | cplxValue = _mm_mul_ps(cplxValue, vScalar); | |
211 | |||
212 | _mm_store_ps(floatBuffer, cplxValue); | ||
213 | |||
214 | 65534 | *complexVectorPtr++ = (int16_t)rintf(floatBuffer[0]); | |
215 | 65534 | *complexVectorPtr++ = (int16_t)rintf(floatBuffer[1]); | |
216 | 65534 | *complexVectorPtr++ = (int16_t)rintf(floatBuffer[2]); | |
217 | 65534 | *complexVectorPtr++ = (int16_t)rintf(floatBuffer[3]); | |
218 | |||
219 | // Interleaves the upper two values in the i and q variables into one buffer | ||
220 | 65534 | cplxValue = _mm_unpackhi_ps(iValue, qValue); | |
221 | 65534 | cplxValue = _mm_mul_ps(cplxValue, vScalar); | |
222 | |||
223 | _mm_store_ps(floatBuffer, cplxValue); | ||
224 | |||
225 | 65534 | *complexVectorPtr++ = (int16_t)rintf(floatBuffer[0]); | |
226 | 65534 | *complexVectorPtr++ = (int16_t)rintf(floatBuffer[1]); | |
227 | 65534 | *complexVectorPtr++ = (int16_t)rintf(floatBuffer[2]); | |
228 | 65534 | *complexVectorPtr++ = (int16_t)rintf(floatBuffer[3]); | |
229 | |||
230 | 65534 | iBufferPtr += 4; | |
231 | 65534 | qBufferPtr += 4; | |
232 | } | ||
233 | |||
234 | 2 | number = quarterPoints * 4; | |
235 | 2 | complexVectorPtr = (int16_t*)(&complexVector[number]); | |
236 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
237 | 6 | *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar); | |
238 | 6 | *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar); | |
239 | } | ||
240 | 2 | } | |
241 | #endif /* LV_HAVE_SSE */ | ||
242 | |||
243 | |||
244 | #ifdef LV_HAVE_GENERIC | ||
245 | |||
246 | 2 | static inline void volk_32f_x2_s32f_interleave_16ic_generic(lv_16sc_t* complexVector, | |
247 | const float* iBuffer, | ||
248 | const float* qBuffer, | ||
249 | const float scalar, | ||
250 | unsigned int num_points) | ||
251 | { | ||
252 | 2 | int16_t* complexVectorPtr = (int16_t*)complexVector; | |
253 | 2 | const float* iBufferPtr = iBuffer; | |
254 | 2 | const float* qBufferPtr = qBuffer; | |
255 | 2 | unsigned int number = 0; | |
256 | |||
257 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
258 | 262142 | *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar); | |
259 | 262142 | *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar); | |
260 | } | ||
261 | 2 | } | |
262 | #endif /* LV_HAVE_GENERIC */ | ||
263 | |||
264 | |||
265 | #endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H */ | ||
266 | |||
267 | #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H | ||
268 | #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H | ||
269 | |||
270 | #include <inttypes.h> | ||
271 | #include <stdio.h> | ||
272 | #include <volk/volk_common.h> | ||
273 | |||
274 | #ifdef LV_HAVE_AVX2 | ||
275 | #include <immintrin.h> | ||
276 | |||
277 | 2 | static inline void volk_32f_x2_s32f_interleave_16ic_u_avx2(lv_16sc_t* complexVector, | |
278 | const float* iBuffer, | ||
279 | const float* qBuffer, | ||
280 | const float scalar, | ||
281 | unsigned int num_points) | ||
282 | { | ||
283 | 2 | unsigned int number = 0; | |
284 | 2 | const float* iBufferPtr = iBuffer; | |
285 | 2 | const float* qBufferPtr = qBuffer; | |
286 | |||
287 | 2 | __m256 vScalar = _mm256_set1_ps(scalar); | |
288 | |||
289 | 2 | const unsigned int eighthPoints = num_points / 8; | |
290 | |||
291 | __m256 iValue, qValue, cplxValue1, cplxValue2; | ||
292 | __m256i intValue1, intValue2; | ||
293 | |||
294 | 2 | int16_t* complexVectorPtr = (int16_t*)complexVector; | |
295 | |||
296 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (; number < eighthPoints; number++) { |
297 | 32766 | iValue = _mm256_loadu_ps(iBufferPtr); | |
298 | 32766 | qValue = _mm256_loadu_ps(qBufferPtr); | |
299 | |||
300 | // Interleaves the lower two values in the i and q variables into one buffer | ||
301 | 32766 | cplxValue1 = _mm256_unpacklo_ps(iValue, qValue); | |
302 | 32766 | cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar); | |
303 | |||
304 | // Interleaves the upper two values in the i and q variables into one buffer | ||
305 | 32766 | cplxValue2 = _mm256_unpackhi_ps(iValue, qValue); | |
306 | 32766 | cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar); | |
307 | |||
308 | 32766 | intValue1 = _mm256_cvtps_epi32(cplxValue1); | |
309 | 32766 | intValue2 = _mm256_cvtps_epi32(cplxValue2); | |
310 | |||
311 | 32766 | intValue1 = _mm256_packs_epi32(intValue1, intValue2); | |
312 | |||
313 | _mm256_storeu_si256((__m256i*)complexVectorPtr, intValue1); | ||
314 | 32766 | complexVectorPtr += 16; | |
315 | |||
316 | 32766 | iBufferPtr += 8; | |
317 | 32766 | qBufferPtr += 8; | |
318 | } | ||
319 | |||
320 | 2 | number = eighthPoints * 8; | |
321 | 2 | complexVectorPtr = (int16_t*)(&complexVector[number]); | |
322 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
323 | 14 | *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar); | |
324 | 14 | *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar); | |
325 | } | ||
326 | 2 | } | |
327 | #endif /* LV_HAVE_AVX2 */ | ||
328 | |||
329 | |||
330 | #endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H */ | ||
331 |