GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32f_s32f_normalize.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 57 57 100.0%
Functions: 5 5 100.0%
Branches: 14 14 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32f_s32f_normalize
12 *
13 * \b Overview
14 *
15 * Normalizes all points in the buffer by the scalar value (divides
16 * each data point by the scalar value).
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_32f_s32f_normalize(float* vecBuffer, const float scalar, unsigned int
21 * num_points) \endcode
22 *
23 * \b Inputs
24 * \li vecBuffer: The buffer of values to be vectorized.
25 * \li scalar: The scale value to be applied to each buffer value.
26 * \li num_points: The number of data points.
27 *
28 * \b Outputs
29 * \li vecBuffer: returns as an in-place calculation.
30 *
31 * \b Example
32 * \code
33 * int N = 10;
34 * unsigned int alignment = volk_get_alignment();
35 * float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
36 * float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
37 *
38 *
39 * for(unsigned int ii = 0; ii < N; ++ii){
40 * increasing[ii] = 2.f * ((float)ii / (float)N) - 1.f;
41 * }
42 *
43 * // Normalize by the smallest delta (0.2 in this example)
44 * float scale = 5.0f;
45 *
46 * volk_32f_s32f_normalize(increasing, scale, N);
47 *
48 * for(unsigned int ii = 0; ii < N; ++ii){
49 * printf("increasing[%u] = %f\n", ii, increasing[ii]);
50 * }
51 *
52 * volk_free(increasing);
53 * volk_free(out);
54 * \endcode
55 */
56
57 #ifndef INCLUDED_volk_32f_s32f_normalize_a_H
58 #define INCLUDED_volk_32f_s32f_normalize_a_H
59
60 #include <inttypes.h>
61 #include <stdio.h>
62
63 #ifdef LV_HAVE_AVX
64 #include <immintrin.h>
65
66 2 static inline void volk_32f_s32f_normalize_a_avx(float* vecBuffer,
67 const float scalar,
68 unsigned int num_points)
69 {
70 2 unsigned int number = 0;
71 2 float* inputPtr = vecBuffer;
72
73 2 const float invScalar = 1.0 / scalar;
74 2 __m256 vecScalar = _mm256_set1_ps(invScalar);
75
76 __m256 input1;
77
78 2 const uint64_t eighthPoints = num_points / 8;
79
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
80
81 32766 input1 = _mm256_load_ps(inputPtr);
82
83 32766 input1 = _mm256_mul_ps(input1, vecScalar);
84
85 _mm256_store_ps(inputPtr, input1);
86
87 32766 inputPtr += 8;
88 }
89
90 2 number = eighthPoints * 8;
91
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
92 14 *inputPtr *= invScalar;
93 14 inputPtr++;
94 }
95 2 }
96 #endif /* LV_HAVE_AVX */
97
98 #ifdef LV_HAVE_SSE
99 #include <xmmintrin.h>
100
101 2 static inline void volk_32f_s32f_normalize_a_sse(float* vecBuffer,
102 const float scalar,
103 unsigned int num_points)
104 {
105 2 unsigned int number = 0;
106 2 float* inputPtr = vecBuffer;
107
108 2 const float invScalar = 1.0 / scalar;
109 2 __m128 vecScalar = _mm_set_ps1(invScalar);
110
111 __m128 input1;
112
113 2 const uint64_t quarterPoints = num_points / 4;
114
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
115
116 65534 input1 = _mm_load_ps(inputPtr);
117
118 65534 input1 = _mm_mul_ps(input1, vecScalar);
119
120 _mm_store_ps(inputPtr, input1);
121
122 65534 inputPtr += 4;
123 }
124
125 2 number = quarterPoints * 4;
126
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
127 6 *inputPtr *= invScalar;
128 6 inputPtr++;
129 }
130 2 }
131 #endif /* LV_HAVE_SSE */
132
133 #ifdef LV_HAVE_GENERIC
134
135 2 static inline void volk_32f_s32f_normalize_generic(float* vecBuffer,
136 const float scalar,
137 unsigned int num_points)
138 {
139 2 unsigned int number = 0;
140 2 float* inputPtr = vecBuffer;
141 2 const float invScalar = 1.0 / scalar;
142
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
143 262142 *inputPtr *= invScalar;
144 262142 inputPtr++;
145 }
146 2 }
147 #endif /* LV_HAVE_GENERIC */
148
149 #ifdef LV_HAVE_ORC
150
151 extern void volk_32f_s32f_normalize_a_orc_impl(float* dst,
152 float* src,
153 const float scalar,
154 unsigned int num_points);
155 2 static inline void volk_32f_s32f_normalize_u_orc(float* vecBuffer,
156 const float scalar,
157 unsigned int num_points)
158 {
159 2 float invscalar = 1.0 / scalar;
160 2 volk_32f_s32f_normalize_a_orc_impl(vecBuffer, vecBuffer, invscalar, num_points);
161 2 }
162 #endif /* LV_HAVE_GENERIC */
163
164 #endif /* INCLUDED_volk_32f_s32f_normalize_a_H */
165
166 #ifndef INCLUDED_volk_32f_s32f_normalize_u_H
167 #define INCLUDED_volk_32f_s32f_normalize_u_H
168
169 #include <inttypes.h>
170 #include <stdio.h>
171 #ifdef LV_HAVE_AVX
172 #include <immintrin.h>
173
174 2 static inline void volk_32f_s32f_normalize_u_avx(float* vecBuffer,
175 const float scalar,
176 unsigned int num_points)
177 {
178 2 unsigned int number = 0;
179 2 float* inputPtr = vecBuffer;
180
181 2 const float invScalar = 1.0 / scalar;
182 2 __m256 vecScalar = _mm256_set1_ps(invScalar);
183
184 __m256 input1;
185
186 2 const uint64_t eighthPoints = num_points / 8;
187
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
188
189 32766 input1 = _mm256_loadu_ps(inputPtr);
190
191 32766 input1 = _mm256_mul_ps(input1, vecScalar);
192
193 _mm256_storeu_ps(inputPtr, input1);
194
195 32766 inputPtr += 8;
196 }
197
198 2 number = eighthPoints * 8;
199
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
200 14 *inputPtr *= invScalar;
201 14 inputPtr++;
202 }
203 2 }
204 #endif /* LV_HAVE_AVX */
205
206
207 #endif /* INCLUDED_volk_32f_s32f_normalize_u_H */
208