GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32fc_conjugate_32fc.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 72 72 100.0%
Functions: 6 6 100.0%
Branches: 18 20 90.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32fc_conjugate_32fc
12 *
13 * \b Overview
14 *
15 * Takes the conjugate of a complex vector.
16 *
17 * <b>Dispatcher Prototype</b>
18 * \code
19 * void volk_32fc_conjugate_32fc(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned
20 * int num_points) \endcode
21 *
22 * \b Inputs
23 * \li aVector: The input vector of complex floats.
24 * \li num_points: The number of data points.
25 *
26 * \b Outputs
27 * \li bVector: The output vector of complex floats.
28 *
29 * \b Example
30 * Generate points around the top half of the unit circle and conjugate them
31 * to give bottom half of the unit circle.
32 * \code
33 * int N = 10;
34 * unsigned int alignment = volk_get_alignment();
35 * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
36 * lv_32fc_t* out = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
37 *
38 * for(unsigned int ii = 0; ii < N; ++ii){
39 * float real = 2.f * ((float)ii / (float)N) - 1.f;
40 * float imag = std::sqrt(1.f - real * real);
41 * in[ii] = lv_cmake(real, imag);
42 * }
43 *
44 * volk_32fc_conjugate_32fc(out, in, N);
45 *
46 * for(unsigned int ii = 0; ii < N; ++ii){
47 * printf("out(%i) = %.1f + %.1fi\n", ii, lv_creal(out[ii]), lv_cimag(out[ii]));
48 * }
49 *
50 * volk_free(in);
51 * volk_free(out);
52 * \endcode
53 */
54
55 #ifndef INCLUDED_volk_32fc_conjugate_32fc_u_H
56 #define INCLUDED_volk_32fc_conjugate_32fc_u_H
57
58 #include <float.h>
59 #include <inttypes.h>
60 #include <stdio.h>
61 #include <volk/volk_complex.h>
62
63 #ifdef LV_HAVE_AVX
64 #include <immintrin.h>
65
66 2 static inline void volk_32fc_conjugate_32fc_u_avx(lv_32fc_t* cVector,
67 const lv_32fc_t* aVector,
68 unsigned int num_points)
69 {
70 2 unsigned int number = 0;
71 2 const unsigned int quarterPoints = num_points / 4;
72
73 __m256 x;
74 2 lv_32fc_t* c = cVector;
75 2 const lv_32fc_t* a = aVector;
76
77 2 __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
78
79
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
80
81 65534 x = _mm256_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
82
83 65534 x = _mm256_xor_ps(x, conjugator); // conjugate register
84
85 _mm256_storeu_ps((float*)c, x); // Store the results back into the C container
86
87 65534 a += 4;
88 65534 c += 4;
89 }
90
91 2 number = quarterPoints * 4;
92
93
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
94 6 *c++ = lv_conj(*a++);
95 }
96 2 }
97 #endif /* LV_HAVE_AVX */
98
99 #ifdef LV_HAVE_SSE3
100 #include <pmmintrin.h>
101
102 2 static inline void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector,
103 const lv_32fc_t* aVector,
104 unsigned int num_points)
105 {
106 2 unsigned int number = 0;
107 2 const unsigned int halfPoints = num_points / 2;
108
109 __m128 x;
110 2 lv_32fc_t* c = cVector;
111 2 const lv_32fc_t* a = aVector;
112
113 2 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
114
115
2/2
✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.
131072 for (; number < halfPoints; number++) {
116
117 131070 x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
118
119 131070 x = _mm_xor_ps(x, conjugator); // conjugate register
120
121 _mm_storeu_ps((float*)c, x); // Store the results back into the C container
122
123 131070 a += 2;
124 131070 c += 2;
125 }
126
127
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if ((num_points % 2) != 0) {
128 2 *c = lv_conj(*a);
129 }
130 2 }
131 #endif /* LV_HAVE_SSE3 */
132
133 #ifdef LV_HAVE_GENERIC
134
135 2 static inline void volk_32fc_conjugate_32fc_generic(lv_32fc_t* cVector,
136 const lv_32fc_t* aVector,
137 unsigned int num_points)
138 {
139 2 lv_32fc_t* cPtr = cVector;
140 2 const lv_32fc_t* aPtr = aVector;
141 2 unsigned int number = 0;
142
143
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
144 262142 *cPtr++ = lv_conj(*aPtr++);
145 }
146 2 }
147 #endif /* LV_HAVE_GENERIC */
148
149
150 #endif /* INCLUDED_volk_32fc_conjugate_32fc_u_H */
151 #ifndef INCLUDED_volk_32fc_conjugate_32fc_a_H
152 #define INCLUDED_volk_32fc_conjugate_32fc_a_H
153
154 #include <float.h>
155 #include <inttypes.h>
156 #include <stdio.h>
157 #include <volk/volk_complex.h>
158
159 #ifdef LV_HAVE_AVX
160 #include <immintrin.h>
161
162 2 static inline void volk_32fc_conjugate_32fc_a_avx(lv_32fc_t* cVector,
163 const lv_32fc_t* aVector,
164 unsigned int num_points)
165 {
166 2 unsigned int number = 0;
167 2 const unsigned int quarterPoints = num_points / 4;
168
169 __m256 x;
170 2 lv_32fc_t* c = cVector;
171 2 const lv_32fc_t* a = aVector;
172
173 2 __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
174
175
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
176
177 65534 x = _mm256_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
178
179 65534 x = _mm256_xor_ps(x, conjugator); // conjugate register
180
181 _mm256_store_ps((float*)c, x); // Store the results back into the C container
182
183 65534 a += 4;
184 65534 c += 4;
185 }
186
187 2 number = quarterPoints * 4;
188
189
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
190 6 *c++ = lv_conj(*a++);
191 }
192 2 }
193 #endif /* LV_HAVE_AVX */
194
195 #ifdef LV_HAVE_SSE3
196 #include <pmmintrin.h>
197
198 2 static inline void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector,
199 const lv_32fc_t* aVector,
200 unsigned int num_points)
201 {
202 2 unsigned int number = 0;
203 2 const unsigned int halfPoints = num_points / 2;
204
205 __m128 x;
206 2 lv_32fc_t* c = cVector;
207 2 const lv_32fc_t* a = aVector;
208
209 2 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
210
211
2/2
✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.
131072 for (; number < halfPoints; number++) {
212
213 131070 x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
214
215 131070 x = _mm_xor_ps(x, conjugator); // conjugate register
216
217 _mm_store_ps((float*)c, x); // Store the results back into the C container
218
219 131070 a += 2;
220 131070 c += 2;
221 }
222
223
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if ((num_points % 2) != 0) {
224 2 *c = lv_conj(*a);
225 }
226 2 }
227 #endif /* LV_HAVE_SSE3 */
228
229 #ifdef LV_HAVE_NEON
230 #include <arm_neon.h>
231
232 static inline void volk_32fc_conjugate_32fc_a_neon(lv_32fc_t* cVector,
233 const lv_32fc_t* aVector,
234 unsigned int num_points)
235 {
236 unsigned int number;
237 const unsigned int quarterPoints = num_points / 4;
238
239 float32x4x2_t x;
240 lv_32fc_t* c = cVector;
241 const lv_32fc_t* a = aVector;
242
243 for (number = 0; number < quarterPoints; number++) {
244 __VOLK_PREFETCH(a + 4);
245 x = vld2q_f32((float*)a); // Load the complex data as ar,br,cr,dr; ai,bi,ci,di
246
247 // xor the imaginary lane
248 x.val[1] = vnegq_f32(x.val[1]);
249
250 vst2q_f32((float*)c, x); // Store the results back into the C container
251
252 a += 4;
253 c += 4;
254 }
255
256 for (number = quarterPoints * 4; number < num_points; number++) {
257 *c++ = lv_conj(*a++);
258 }
259 }
260 #endif /* LV_HAVE_NEON */
261
262
263 #ifdef LV_HAVE_GENERIC
264
265 2 static inline void volk_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector,
266 const lv_32fc_t* aVector,
267 unsigned int num_points)
268 {
269 2 lv_32fc_t* cPtr = cVector;
270 2 const lv_32fc_t* aPtr = aVector;
271 2 unsigned int number = 0;
272
273
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
274 262142 *cPtr++ = lv_conj(*aPtr++);
275 }
276 2 }
277 #endif /* LV_HAVE_GENERIC */
278
279
280 #endif /* INCLUDED_volk_32fc_conjugate_32fc_a_H */
281