Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | /*! | ||
11 | * \page volk_32f_s32f_normalize | ||
12 | * | ||
13 | * \b Overview | ||
14 | * | ||
15 | * Normalizes all points in the buffer by the scalar value (divides | ||
16 | * each data point by the scalar value). | ||
17 | * | ||
18 | * <b>Dispatcher Prototype</b> | ||
19 | * \code | ||
20 | * void volk_32f_s32f_normalize(float* vecBuffer, const float scalar, unsigned int | ||
21 | * num_points) \endcode | ||
22 | * | ||
23 | * \b Inputs | ||
24 | * \li vecBuffer: The buffer of values to be vectorized. | ||
25 | * \li scalar: The scale value to be applied to each buffer value. | ||
26 | * \li num_points: The number of data points. | ||
27 | * | ||
28 | * \b Outputs | ||
29 | * \li vecBuffer: returns as an in-place calculation. | ||
30 | * | ||
31 | * \b Example | ||
32 | * \code | ||
33 | * int N = 10; | ||
34 | * unsigned int alignment = volk_get_alignment(); | ||
35 | * float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment); | ||
36 | * float* out = (float*)volk_malloc(sizeof(float)*N, alignment); | ||
37 | * | ||
38 | * | ||
39 | * for(unsigned int ii = 0; ii < N; ++ii){ | ||
40 | * increasing[ii] = 2.f * ((float)ii / (float)N) - 1.f; | ||
41 | * } | ||
42 | * | ||
43 | * // Normalize by the smallest delta (0.2 in this example) | ||
44 | * float scale = 5.0f; | ||
45 | * | ||
46 | * volk_32f_s32f_normalize(increasing, scale, N); | ||
47 | * | ||
48 | * for(unsigned int ii = 0; ii < N; ++ii){ | ||
49 | * printf("increasing[%u] = %f\n", ii, increasing[ii]); | ||
50 | * } | ||
51 | * | ||
52 | * volk_free(increasing); | ||
53 | * volk_free(out); | ||
54 | * \endcode | ||
55 | */ | ||
56 | |||
57 | #ifndef INCLUDED_volk_32f_s32f_normalize_a_H | ||
58 | #define INCLUDED_volk_32f_s32f_normalize_a_H | ||
59 | |||
60 | #include <inttypes.h> | ||
61 | #include <stdio.h> | ||
62 | |||
63 | #ifdef LV_HAVE_AVX | ||
64 | #include <immintrin.h> | ||
65 | |||
66 | 2 | static inline void volk_32f_s32f_normalize_a_avx(float* vecBuffer, | |
67 | const float scalar, | ||
68 | unsigned int num_points) | ||
69 | { | ||
70 | 2 | unsigned int number = 0; | |
71 | 2 | float* inputPtr = vecBuffer; | |
72 | |||
73 | 2 | const float invScalar = 1.0 / scalar; | |
74 | 2 | __m256 vecScalar = _mm256_set1_ps(invScalar); | |
75 | |||
76 | __m256 input1; | ||
77 | |||
78 | 2 | const uint64_t eighthPoints = num_points / 8; | |
79 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (; number < eighthPoints; number++) { |
80 | |||
81 | 32766 | input1 = _mm256_load_ps(inputPtr); | |
82 | |||
83 | 32766 | input1 = _mm256_mul_ps(input1, vecScalar); | |
84 | |||
85 | _mm256_store_ps(inputPtr, input1); | ||
86 | |||
87 | 32766 | inputPtr += 8; | |
88 | } | ||
89 | |||
90 | 2 | number = eighthPoints * 8; | |
91 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
92 | 14 | *inputPtr *= invScalar; | |
93 | 14 | inputPtr++; | |
94 | } | ||
95 | 2 | } | |
96 | #endif /* LV_HAVE_AVX */ | ||
97 | |||
98 | #ifdef LV_HAVE_SSE | ||
99 | #include <xmmintrin.h> | ||
100 | |||
101 | 2 | static inline void volk_32f_s32f_normalize_a_sse(float* vecBuffer, | |
102 | const float scalar, | ||
103 | unsigned int num_points) | ||
104 | { | ||
105 | 2 | unsigned int number = 0; | |
106 | 2 | float* inputPtr = vecBuffer; | |
107 | |||
108 | 2 | const float invScalar = 1.0 / scalar; | |
109 | 2 | __m128 vecScalar = _mm_set_ps1(invScalar); | |
110 | |||
111 | __m128 input1; | ||
112 | |||
113 | 2 | const uint64_t quarterPoints = num_points / 4; | |
114 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
115 | |||
116 | 65534 | input1 = _mm_load_ps(inputPtr); | |
117 | |||
118 | 65534 | input1 = _mm_mul_ps(input1, vecScalar); | |
119 | |||
120 | _mm_store_ps(inputPtr, input1); | ||
121 | |||
122 | 65534 | inputPtr += 4; | |
123 | } | ||
124 | |||
125 | 2 | number = quarterPoints * 4; | |
126 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
127 | 6 | *inputPtr *= invScalar; | |
128 | 6 | inputPtr++; | |
129 | } | ||
130 | 2 | } | |
131 | #endif /* LV_HAVE_SSE */ | ||
132 | |||
133 | #ifdef LV_HAVE_GENERIC | ||
134 | |||
135 | 2 | static inline void volk_32f_s32f_normalize_generic(float* vecBuffer, | |
136 | const float scalar, | ||
137 | unsigned int num_points) | ||
138 | { | ||
139 | 2 | unsigned int number = 0; | |
140 | 2 | float* inputPtr = vecBuffer; | |
141 | 2 | const float invScalar = 1.0 / scalar; | |
142 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
143 | 262142 | *inputPtr *= invScalar; | |
144 | 262142 | inputPtr++; | |
145 | } | ||
146 | 2 | } | |
147 | #endif /* LV_HAVE_GENERIC */ | ||
148 | |||
149 | #ifdef LV_HAVE_ORC | ||
150 | |||
151 | extern void volk_32f_s32f_normalize_a_orc_impl(float* dst, | ||
152 | float* src, | ||
153 | const float scalar, | ||
154 | unsigned int num_points); | ||
155 | 2 | static inline void volk_32f_s32f_normalize_u_orc(float* vecBuffer, | |
156 | const float scalar, | ||
157 | unsigned int num_points) | ||
158 | { | ||
159 | 2 | float invscalar = 1.0 / scalar; | |
160 | 2 | volk_32f_s32f_normalize_a_orc_impl(vecBuffer, vecBuffer, invscalar, num_points); | |
161 | 2 | } | |
162 | #endif /* LV_HAVE_GENERIC */ | ||
163 | |||
164 | #endif /* INCLUDED_volk_32f_s32f_normalize_a_H */ | ||
165 | |||
166 | #ifndef INCLUDED_volk_32f_s32f_normalize_u_H | ||
167 | #define INCLUDED_volk_32f_s32f_normalize_u_H | ||
168 | |||
169 | #include <inttypes.h> | ||
170 | #include <stdio.h> | ||
171 | #ifdef LV_HAVE_AVX | ||
172 | #include <immintrin.h> | ||
173 | |||
174 | 2 | static inline void volk_32f_s32f_normalize_u_avx(float* vecBuffer, | |
175 | const float scalar, | ||
176 | unsigned int num_points) | ||
177 | { | ||
178 | 2 | unsigned int number = 0; | |
179 | 2 | float* inputPtr = vecBuffer; | |
180 | |||
181 | 2 | const float invScalar = 1.0 / scalar; | |
182 | 2 | __m256 vecScalar = _mm256_set1_ps(invScalar); | |
183 | |||
184 | __m256 input1; | ||
185 | |||
186 | 2 | const uint64_t eighthPoints = num_points / 8; | |
187 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (; number < eighthPoints; number++) { |
188 | |||
189 | 32766 | input1 = _mm256_loadu_ps(inputPtr); | |
190 | |||
191 | 32766 | input1 = _mm256_mul_ps(input1, vecScalar); | |
192 | |||
193 | _mm256_storeu_ps(inputPtr, input1); | ||
194 | |||
195 | 32766 | inputPtr += 8; | |
196 | } | ||
197 | |||
198 | 2 | number = eighthPoints * 8; | |
199 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
200 | 14 | *inputPtr *= invScalar; | |
201 | 14 | inputPtr++; | |
202 | } | ||
203 | 2 | } | |
204 | #endif /* LV_HAVE_AVX */ | ||
205 | |||
206 | |||
207 | #endif /* INCLUDED_volk_32f_s32f_normalize_u_H */ | ||
208 |