Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | /*! | ||
11 | * \page volk_32fc_conjugate_32fc | ||
12 | * | ||
13 | * \b Overview | ||
14 | * | ||
15 | * Takes the conjugate of a complex vector. | ||
16 | * | ||
17 | * <b>Dispatcher Prototype</b> | ||
18 | * \code | ||
19 | * void volk_32fc_conjugate_32fc(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned | ||
20 | * int num_points) \endcode | ||
21 | * | ||
22 | * \b Inputs | ||
23 | * \li aVector: The input vector of complex floats. | ||
24 | * \li num_points: The number of data points. | ||
25 | * | ||
26 | * \b Outputs | ||
27 | * \li bVector: The output vector of complex floats. | ||
28 | * | ||
29 | * \b Example | ||
30 | * Generate points around the top half of the unit circle and conjugate them | ||
31 | * to give bottom half of the unit circle. | ||
32 | * \code | ||
33 | * int N = 10; | ||
34 | * unsigned int alignment = volk_get_alignment(); | ||
35 | * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment); | ||
36 | * lv_32fc_t* out = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment); | ||
37 | * | ||
38 | * for(unsigned int ii = 0; ii < N; ++ii){ | ||
39 | * float real = 2.f * ((float)ii / (float)N) - 1.f; | ||
40 | * float imag = std::sqrt(1.f - real * real); | ||
41 | * in[ii] = lv_cmake(real, imag); | ||
42 | * } | ||
43 | * | ||
44 | * volk_32fc_conjugate_32fc(out, in, N); | ||
45 | * | ||
46 | * for(unsigned int ii = 0; ii < N; ++ii){ | ||
47 | * printf("out(%i) = %.1f + %.1fi\n", ii, lv_creal(out[ii]), lv_cimag(out[ii])); | ||
48 | * } | ||
49 | * | ||
50 | * volk_free(in); | ||
51 | * volk_free(out); | ||
52 | * \endcode | ||
53 | */ | ||
54 | |||
55 | #ifndef INCLUDED_volk_32fc_conjugate_32fc_u_H | ||
56 | #define INCLUDED_volk_32fc_conjugate_32fc_u_H | ||
57 | |||
58 | #include <float.h> | ||
59 | #include <inttypes.h> | ||
60 | #include <stdio.h> | ||
61 | #include <volk/volk_complex.h> | ||
62 | |||
63 | #ifdef LV_HAVE_AVX | ||
64 | #include <immintrin.h> | ||
65 | |||
66 | 2 | static inline void volk_32fc_conjugate_32fc_u_avx(lv_32fc_t* cVector, | |
67 | const lv_32fc_t* aVector, | ||
68 | unsigned int num_points) | ||
69 | { | ||
70 | 2 | unsigned int number = 0; | |
71 | 2 | const unsigned int quarterPoints = num_points / 4; | |
72 | |||
73 | __m256 x; | ||
74 | 2 | lv_32fc_t* c = cVector; | |
75 | 2 | const lv_32fc_t* a = aVector; | |
76 | |||
77 | 2 | __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f); | |
78 | |||
79 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
80 | |||
81 | 65534 | x = _mm256_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi | |
82 | |||
83 | 65534 | x = _mm256_xor_ps(x, conjugator); // conjugate register | |
84 | |||
85 | _mm256_storeu_ps((float*)c, x); // Store the results back into the C container | ||
86 | |||
87 | 65534 | a += 4; | |
88 | 65534 | c += 4; | |
89 | } | ||
90 | |||
91 | 2 | number = quarterPoints * 4; | |
92 | |||
93 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
94 | 6 | *c++ = lv_conj(*a++); | |
95 | } | ||
96 | 2 | } | |
97 | #endif /* LV_HAVE_AVX */ | ||
98 | |||
99 | #ifdef LV_HAVE_SSE3 | ||
100 | #include <pmmintrin.h> | ||
101 | |||
102 | 2 | static inline void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector, | |
103 | const lv_32fc_t* aVector, | ||
104 | unsigned int num_points) | ||
105 | { | ||
106 | 2 | unsigned int number = 0; | |
107 | 2 | const unsigned int halfPoints = num_points / 2; | |
108 | |||
109 | __m128 x; | ||
110 | 2 | lv_32fc_t* c = cVector; | |
111 | 2 | const lv_32fc_t* a = aVector; | |
112 | |||
113 | 2 | __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); | |
114 | |||
115 |
2/2✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.
|
131072 | for (; number < halfPoints; number++) { |
116 | |||
117 | 131070 | x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi | |
118 | |||
119 | 131070 | x = _mm_xor_ps(x, conjugator); // conjugate register | |
120 | |||
121 | _mm_storeu_ps((float*)c, x); // Store the results back into the C container | ||
122 | |||
123 | 131070 | a += 2; | |
124 | 131070 | c += 2; | |
125 | } | ||
126 | |||
127 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | if ((num_points % 2) != 0) { |
128 | 2 | *c = lv_conj(*a); | |
129 | } | ||
130 | 2 | } | |
131 | #endif /* LV_HAVE_SSE3 */ | ||
132 | |||
133 | #ifdef LV_HAVE_GENERIC | ||
134 | |||
135 | 2 | static inline void volk_32fc_conjugate_32fc_generic(lv_32fc_t* cVector, | |
136 | const lv_32fc_t* aVector, | ||
137 | unsigned int num_points) | ||
138 | { | ||
139 | 2 | lv_32fc_t* cPtr = cVector; | |
140 | 2 | const lv_32fc_t* aPtr = aVector; | |
141 | 2 | unsigned int number = 0; | |
142 | |||
143 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
144 | 262142 | *cPtr++ = lv_conj(*aPtr++); | |
145 | } | ||
146 | 2 | } | |
147 | #endif /* LV_HAVE_GENERIC */ | ||
148 | |||
149 | |||
150 | #endif /* INCLUDED_volk_32fc_conjugate_32fc_u_H */ | ||
151 | #ifndef INCLUDED_volk_32fc_conjugate_32fc_a_H | ||
152 | #define INCLUDED_volk_32fc_conjugate_32fc_a_H | ||
153 | |||
154 | #include <float.h> | ||
155 | #include <inttypes.h> | ||
156 | #include <stdio.h> | ||
157 | #include <volk/volk_complex.h> | ||
158 | |||
159 | #ifdef LV_HAVE_AVX | ||
160 | #include <immintrin.h> | ||
161 | |||
162 | 2 | static inline void volk_32fc_conjugate_32fc_a_avx(lv_32fc_t* cVector, | |
163 | const lv_32fc_t* aVector, | ||
164 | unsigned int num_points) | ||
165 | { | ||
166 | 2 | unsigned int number = 0; | |
167 | 2 | const unsigned int quarterPoints = num_points / 4; | |
168 | |||
169 | __m256 x; | ||
170 | 2 | lv_32fc_t* c = cVector; | |
171 | 2 | const lv_32fc_t* a = aVector; | |
172 | |||
173 | 2 | __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f); | |
174 | |||
175 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
176 | |||
177 | 65534 | x = _mm256_load_ps((float*)a); // Load the complex data as ar,ai,br,bi | |
178 | |||
179 | 65534 | x = _mm256_xor_ps(x, conjugator); // conjugate register | |
180 | |||
181 | _mm256_store_ps((float*)c, x); // Store the results back into the C container | ||
182 | |||
183 | 65534 | a += 4; | |
184 | 65534 | c += 4; | |
185 | } | ||
186 | |||
187 | 2 | number = quarterPoints * 4; | |
188 | |||
189 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
190 | 6 | *c++ = lv_conj(*a++); | |
191 | } | ||
192 | 2 | } | |
193 | #endif /* LV_HAVE_AVX */ | ||
194 | |||
195 | #ifdef LV_HAVE_SSE3 | ||
196 | #include <pmmintrin.h> | ||
197 | |||
198 | 2 | static inline void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector, | |
199 | const lv_32fc_t* aVector, | ||
200 | unsigned int num_points) | ||
201 | { | ||
202 | 2 | unsigned int number = 0; | |
203 | 2 | const unsigned int halfPoints = num_points / 2; | |
204 | |||
205 | __m128 x; | ||
206 | 2 | lv_32fc_t* c = cVector; | |
207 | 2 | const lv_32fc_t* a = aVector; | |
208 | |||
209 | 2 | __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); | |
210 | |||
211 |
2/2✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.
|
131072 | for (; number < halfPoints; number++) { |
212 | |||
213 | 131070 | x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi | |
214 | |||
215 | 131070 | x = _mm_xor_ps(x, conjugator); // conjugate register | |
216 | |||
217 | _mm_store_ps((float*)c, x); // Store the results back into the C container | ||
218 | |||
219 | 131070 | a += 2; | |
220 | 131070 | c += 2; | |
221 | } | ||
222 | |||
223 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | if ((num_points % 2) != 0) { |
224 | 2 | *c = lv_conj(*a); | |
225 | } | ||
226 | 2 | } | |
227 | #endif /* LV_HAVE_SSE3 */ | ||
228 | |||
229 | #ifdef LV_HAVE_NEON | ||
230 | #include <arm_neon.h> | ||
231 | |||
232 | static inline void volk_32fc_conjugate_32fc_a_neon(lv_32fc_t* cVector, | ||
233 | const lv_32fc_t* aVector, | ||
234 | unsigned int num_points) | ||
235 | { | ||
236 | unsigned int number; | ||
237 | const unsigned int quarterPoints = num_points / 4; | ||
238 | |||
239 | float32x4x2_t x; | ||
240 | lv_32fc_t* c = cVector; | ||
241 | const lv_32fc_t* a = aVector; | ||
242 | |||
243 | for (number = 0; number < quarterPoints; number++) { | ||
244 | __VOLK_PREFETCH(a + 4); | ||
245 | x = vld2q_f32((float*)a); // Load the complex data as ar,br,cr,dr; ai,bi,ci,di | ||
246 | |||
247 | // xor the imaginary lane | ||
248 | x.val[1] = vnegq_f32(x.val[1]); | ||
249 | |||
250 | vst2q_f32((float*)c, x); // Store the results back into the C container | ||
251 | |||
252 | a += 4; | ||
253 | c += 4; | ||
254 | } | ||
255 | |||
256 | for (number = quarterPoints * 4; number < num_points; number++) { | ||
257 | *c++ = lv_conj(*a++); | ||
258 | } | ||
259 | } | ||
260 | #endif /* LV_HAVE_NEON */ | ||
261 | |||
262 | |||
263 | #ifdef LV_HAVE_GENERIC | ||
264 | |||
265 | 2 | static inline void volk_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector, | |
266 | const lv_32fc_t* aVector, | ||
267 | unsigned int num_points) | ||
268 | { | ||
269 | 2 | lv_32fc_t* cPtr = cVector; | |
270 | 2 | const lv_32fc_t* aPtr = aVector; | |
271 | 2 | unsigned int number = 0; | |
272 | |||
273 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
274 | 262142 | *cPtr++ = lv_conj(*aPtr++); | |
275 | } | ||
276 | 2 | } | |
277 | #endif /* LV_HAVE_GENERIC */ | ||
278 | |||
279 | |||
280 | #endif /* INCLUDED_volk_32fc_conjugate_32fc_a_H */ | ||
281 |