GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32f_binary_slicer_32i.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 90 90 100.0%
Functions: 6 6 100.0%
Branches: 30 30 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32f_binary_slicer_32i
12 *
13 * \b Overview
14 *
15 * Slices input floats and and returns 1 when the input >= 0 and 0
16 * when < 0. Results are returned as 32-bit ints.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_32f_binary_slicer_32i(int* cVector, const float* aVector, unsigned int
21 * num_points) \endcode
22 *
23 * \b Inputs
24 * \li aVector: The input vector of floats.
25 * \li num_points: The number of data points.
26 *
27 * \b Outputs
28 * \li cVector: The output vector of 32-bit ints.
29 *
30 * \b Example
31 * Generate ints of a 7-bit barker code from floats.
32 * \code
33 * int N = 7;
34 * unsigned int alignment = volk_get_alignment();
35 * float* in = (float*)volk_malloc(sizeof(float)*N, alignment);
36 * int32_t* out = (int32_t*)volk_malloc(sizeof(int32_t)*N, alignment);
37 *
38 * in[0] = 0.9f;
39 * in[1] = 1.1f;
40 * in[2] = 0.4f;
41 * in[3] = -0.7f;
42 * in[5] = -1.2f;
43 * in[6] = 0.2f;
44 * in[7] = -0.8f;
45 *
46 * volk_32f_binary_slicer_32i(out, in, N);
47 *
48 * for(unsigned int ii = 0; ii < N; ++ii){
49 * printf("out(%i) = %i\n", ii, out[ii]);
50 * }
51 *
52 * volk_free(in);
53 * volk_free(out);
54 * \endcode
55 */
56
57 #ifndef INCLUDED_volk_32f_binary_slicer_32i_H
58 #define INCLUDED_volk_32f_binary_slicer_32i_H
59
60
61 #ifdef LV_HAVE_GENERIC
62
63 2 static inline void volk_32f_binary_slicer_32i_generic(int* cVector,
64 const float* aVector,
65 unsigned int num_points)
66 {
67 2 int* cPtr = cVector;
68 2 const float* aPtr = aVector;
69 2 unsigned int number = 0;
70
71
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
72
2/2
✓ Branch 0 taken 130341 times.
✓ Branch 1 taken 131801 times.
262142 if (*aPtr++ >= 0) {
73 130341 *cPtr++ = 1;
74 } else {
75 131801 *cPtr++ = 0;
76 }
77 }
78 2 }
79 #endif /* LV_HAVE_GENERIC */
80
81
82 #ifdef LV_HAVE_GENERIC
83
84 2 static inline void volk_32f_binary_slicer_32i_generic_branchless(int* cVector,
85 const float* aVector,
86 unsigned int num_points)
87 {
88 2 int* cPtr = cVector;
89 2 const float* aPtr = aVector;
90 2 unsigned int number = 0;
91
92
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
93 262142 *cPtr++ = (*aPtr++ >= 0);
94 }
95 2 }
96 #endif /* LV_HAVE_GENERIC */
97
98
99 #ifdef LV_HAVE_SSE2
100 #include <emmintrin.h>
101
102 2 static inline void volk_32f_binary_slicer_32i_a_sse2(int* cVector,
103 const float* aVector,
104 unsigned int num_points)
105 {
106 2 int* cPtr = cVector;
107 2 const float* aPtr = aVector;
108 2 unsigned int number = 0;
109
110 2 unsigned int quarter_points = num_points / 4;
111 __m128 a_val, res_f;
112 __m128i res_i, binary_i;
113 __m128 zero_val;
114 2 zero_val = _mm_set1_ps(0.0f);
115
116
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (number = 0; number < quarter_points; number++) {
117 65534 a_val = _mm_load_ps(aPtr);
118
119 65534 res_f = _mm_cmpge_ps(a_val, zero_val);
120 65534 res_i = _mm_cvtps_epi32(res_f);
121 65534 binary_i = _mm_srli_epi32(res_i, 31);
122
123 _mm_store_si128((__m128i*)cPtr, binary_i);
124
125 65534 cPtr += 4;
126 65534 aPtr += 4;
127 }
128
129
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (number = quarter_points * 4; number < num_points; number++) {
130
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
6 if (*aPtr++ >= 0) {
131 4 *cPtr++ = 1;
132 } else {
133 2 *cPtr++ = 0;
134 }
135 }
136 2 }
137 #endif /* LV_HAVE_SSE2 */
138
139
140 #ifdef LV_HAVE_AVX
141 #include <immintrin.h>
142
143 2 static inline void volk_32f_binary_slicer_32i_a_avx(int* cVector,
144 const float* aVector,
145 unsigned int num_points)
146 {
147 2 int* cPtr = cVector;
148 2 const float* aPtr = aVector;
149 2 unsigned int number = 0;
150
151 2 unsigned int quarter_points = num_points / 8;
152 __m256 a_val, res_f, binary_f;
153 __m256i binary_i;
154 __m256 zero_val, one_val;
155 2 zero_val = _mm256_set1_ps(0.0f);
156 2 one_val = _mm256_set1_ps(1.0f);
157
158
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (number = 0; number < quarter_points; number++) {
159 32766 a_val = _mm256_load_ps(aPtr);
160
161 32766 res_f = _mm256_cmp_ps(a_val, zero_val, _CMP_GE_OS);
162 32766 binary_f = _mm256_and_ps(res_f, one_val);
163 32766 binary_i = _mm256_cvtps_epi32(binary_f);
164
165 _mm256_store_si256((__m256i*)cPtr, binary_i);
166
167 32766 cPtr += 8;
168 32766 aPtr += 8;
169 }
170
171
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (number = quarter_points * 8; number < num_points; number++) {
172
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 6 times.
14 if (*aPtr++ >= 0) {
173 8 *cPtr++ = 1;
174 } else {
175 6 *cPtr++ = 0;
176 }
177 }
178 2 }
179 #endif /* LV_HAVE_AVX */
180
181
182 #ifdef LV_HAVE_SSE2
183 #include <emmintrin.h>
184
185 2 static inline void volk_32f_binary_slicer_32i_u_sse2(int* cVector,
186 const float* aVector,
187 unsigned int num_points)
188 {
189 2 int* cPtr = cVector;
190 2 const float* aPtr = aVector;
191 2 unsigned int number = 0;
192
193 2 unsigned int quarter_points = num_points / 4;
194 __m128 a_val, res_f;
195 __m128i res_i, binary_i;
196 __m128 zero_val;
197 2 zero_val = _mm_set1_ps(0.0f);
198
199
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (number = 0; number < quarter_points; number++) {
200 65534 a_val = _mm_loadu_ps(aPtr);
201
202 65534 res_f = _mm_cmpge_ps(a_val, zero_val);
203 65534 res_i = _mm_cvtps_epi32(res_f);
204 65534 binary_i = _mm_srli_epi32(res_i, 31);
205
206 _mm_storeu_si128((__m128i*)cPtr, binary_i);
207
208 65534 cPtr += 4;
209 65534 aPtr += 4;
210 }
211
212
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (number = quarter_points * 4; number < num_points; number++) {
213
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
6 if (*aPtr++ >= 0) {
214 4 *cPtr++ = 1;
215 } else {
216 2 *cPtr++ = 0;
217 }
218 }
219 2 }
220 #endif /* LV_HAVE_SSE2 */
221
222
223 #ifdef LV_HAVE_AVX
224 #include <immintrin.h>
225
226 2 static inline void volk_32f_binary_slicer_32i_u_avx(int* cVector,
227 const float* aVector,
228 unsigned int num_points)
229 {
230 2 int* cPtr = cVector;
231 2 const float* aPtr = aVector;
232 2 unsigned int number = 0;
233
234 2 unsigned int quarter_points = num_points / 8;
235 __m256 a_val, res_f, binary_f;
236 __m256i binary_i;
237 __m256 zero_val, one_val;
238 2 zero_val = _mm256_set1_ps(0.0f);
239 2 one_val = _mm256_set1_ps(1.0f);
240
241
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (number = 0; number < quarter_points; number++) {
242 32766 a_val = _mm256_loadu_ps(aPtr);
243
244 32766 res_f = _mm256_cmp_ps(a_val, zero_val, _CMP_GE_OS);
245 32766 binary_f = _mm256_and_ps(res_f, one_val);
246 32766 binary_i = _mm256_cvtps_epi32(binary_f);
247
248 _mm256_storeu_si256((__m256i*)cPtr, binary_i);
249
250 32766 cPtr += 8;
251 32766 aPtr += 8;
252 }
253
254
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (number = quarter_points * 8; number < num_points; number++) {
255
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 6 times.
14 if (*aPtr++ >= 0) {
256 8 *cPtr++ = 1;
257 } else {
258 6 *cPtr++ = 0;
259 }
260 }
261 2 }
262 #endif /* LV_HAVE_AVX */
263
264
265 #endif /* INCLUDED_volk_32f_binary_slicer_32i_H */
266