GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32f_s32f_power_32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 21 21 100.0%
Functions: 3 3 100.0%
Branches: 6 6 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32f_s32f_power_32f
12 *
13 * \b Overview
14 *
15 * Takes each input vector value to the specified power and stores the
16 * results in the return vector.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_32f_s32f_power_32f(float* cVector, const float* aVector, const float power,
21 * unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li aVector: The input vector of floats.
25 * \li power: The power to raise the input value to.
26 * \li num_points: The number of data points.
27 *
28 * \b Outputs
29 * \li cVector: The output vector.
30 *
31 * \b Example
32 * Square the numbers (0,9)
33 * \code
34 * int N = 10;
35 * unsigned int alignment = volk_get_alignment();
36 * float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
37 * float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
38 *
39 *
40 * for(unsigned int ii = 0; ii < N; ++ii){
41 * increasing[ii] = (float)ii;
42 * }
43 *
44 * // Normalize by the smallest delta (0.2 in this example)
45 * float scale = 2.0f;
46 *
47 * volk_32f_s32f_power_32f(out, increasing, scale, N);
48 *
49 * for(unsigned int ii = 0; ii < N; ++ii){
50 * printf("out[%u] = %f\n", ii, out[ii]);
51 * }
52 *
53 * volk_free(increasing);
54 * volk_free(out);
55 * \endcode
56 */
57
58 #ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
59 #define INCLUDED_volk_32f_s32f_power_32f_a_H
60
61 #include <inttypes.h>
62 #include <math.h>
63 #include <stdio.h>
64
65 #ifdef LV_HAVE_SSE4_1
66 #include <tmmintrin.h>
67
68 #ifdef LV_HAVE_LIB_SIMDMATH
69 #include <simdmath.h>
70 #endif /* LV_HAVE_LIB_SIMDMATH */
71
72 2 static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector,
73 const float* aVector,
74 const float power,
75 unsigned int num_points)
76 {
77 2 unsigned int number = 0;
78
79 2 float* cPtr = cVector;
80 2 const float* aPtr = aVector;
81
82 #ifdef LV_HAVE_LIB_SIMDMATH
83 const unsigned int quarterPoints = num_points / 4;
84 __m128 vPower = _mm_set_ps1(power);
85 __m128 zeroValue = _mm_setzero_ps();
86 __m128 signMask;
87 __m128 negatedValues;
88 __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
89 __m128 onesMask = _mm_set_ps1(1);
90
91 __m128 aVal, cVal;
92 for (; number < quarterPoints; number++) {
93
94 aVal = _mm_load_ps(aPtr);
95 signMask = _mm_cmplt_ps(aVal, zeroValue);
96 negatedValues = _mm_sub_ps(zeroValue, aVal);
97 aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
98
99 // powf4 doesn't support negative values in the base, so we mask them off and then
100 // apply the negative after
101 cVal = powf4(aVal, vPower); // Takes each input value to the specified power
102
103 cVal = _mm_mul_ps(_mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
104
105 _mm_store_ps(cPtr, cVal); // Store the results back into the C container
106
107 aPtr += 4;
108 cPtr += 4;
109 }
110
111 number = quarterPoints * 4;
112 #endif /* LV_HAVE_LIB_SIMDMATH */
113
114
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (; number < num_points; number++) {
115 262142 *cPtr++ = powf((*aPtr++), power);
116 }
117 2 }
118
119 #endif /* LV_HAVE_SSE4_1 */
120
121
122 #ifdef LV_HAVE_SSE
123 #include <xmmintrin.h>
124
125 #ifdef LV_HAVE_LIB_SIMDMATH
126 #include <simdmath.h>
127 #endif /* LV_HAVE_LIB_SIMDMATH */
128
129 2 static inline void volk_32f_s32f_power_32f_a_sse(float* cVector,
130 const float* aVector,
131 const float power,
132 unsigned int num_points)
133 {
134 2 unsigned int number = 0;
135
136 2 float* cPtr = cVector;
137 2 const float* aPtr = aVector;
138
139 #ifdef LV_HAVE_LIB_SIMDMATH
140 const unsigned int quarterPoints = num_points / 4;
141 __m128 vPower = _mm_set_ps1(power);
142 __m128 zeroValue = _mm_setzero_ps();
143 __m128 signMask;
144 __m128 negatedValues;
145 __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
146 __m128 onesMask = _mm_set_ps1(1);
147
148 __m128 aVal, cVal;
149 for (; number < quarterPoints; number++) {
150
151 aVal = _mm_load_ps(aPtr);
152 signMask = _mm_cmplt_ps(aVal, zeroValue);
153 negatedValues = _mm_sub_ps(zeroValue, aVal);
154 aVal =
155 _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues));
156
157 // powf4 doesn't support negative values in the base, so we mask them off and then
158 // apply the negative after
159 cVal = powf4(aVal, vPower); // Takes each input value to the specified power
160
161 cVal = _mm_mul_ps(_mm_or_ps(_mm_andnot_ps(signMask, onesMask),
162 _mm_and_ps(signMask, negativeOneToPower)),
163 cVal);
164
165 _mm_store_ps(cPtr, cVal); // Store the results back into the C container
166
167 aPtr += 4;
168 cPtr += 4;
169 }
170
171 number = quarterPoints * 4;
172 #endif /* LV_HAVE_LIB_SIMDMATH */
173
174
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (; number < num_points; number++) {
175 262142 *cPtr++ = powf((*aPtr++), power);
176 }
177 2 }
178
179 #endif /* LV_HAVE_SSE */
180
181
182 #ifdef LV_HAVE_GENERIC
183
184 2 static inline void volk_32f_s32f_power_32f_generic(float* cVector,
185 const float* aVector,
186 const float power,
187 unsigned int num_points)
188 {
189 2 float* cPtr = cVector;
190 2 const float* aPtr = aVector;
191 2 unsigned int number = 0;
192
193
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
194 262142 *cPtr++ = powf((*aPtr++), power);
195 }
196 2 }
197 #endif /* LV_HAVE_GENERIC */
198
199
200 #endif /* INCLUDED_volk_32f_s32f_power_32f_a_H */
201