GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_64f_x2_multiply_64f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 76 76 100.0%
Functions: 5 5 100.0%
Branches: 18 18 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2018 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_64f_x2_multiply_64f
12 *
13 * \b Overview
14 *
15 * Multiplies two input double-precision floating point vectors together.
16 *
17 * c[i] = a[i] * b[i]
18 *
19 * <b>Dispatcher Prototype</b>
20 * \code
21 * void volk_64f_x2_multiply_64f(float* cVector, const float* aVector, const float*
22 * bVector, unsigned int num_points) \endcode
23 *
24 * \b Inputs
25 * \li aVector: First input vector.
26 * \li bVector: Second input vector.
27 * \li num_points: The number of values in both input vectors.
28 *
29 * \b Outputs
30 * \li cVector: The output vector.
31 *
32 * \b Example
33 * Multiply elements of an increasing vector by those of a decreasing vector.
34 * \code
35 * int N = 10;
36 * unsigned int alignment = volk_get_alignment();
37 * double* increasing = (double*)volk_malloc(sizeof(double)*N, alignment);
38 * double* decreasing = (double*)volk_malloc(sizeof(double)*N, alignment);
39 * double* out = (double*)volk_malloc(sizeof(double)*N, alignment);
40 *
41 * for(unsigned int ii = 0; ii < N; ++ii){
42 * increasing[ii] = (float)ii;
43 * decreasing[ii] = 10.f - (float)ii;
44 * }
45 *
46 * volk_64f_x2_multiply_64f(out, increasing, decreasing, N);
47 *
48 * for(unsigned int ii = 0; ii < N; ++ii){
49 * printf("out[%u] = %1.2F\n", ii, out[ii]);
50 * }
51 *
52 * volk_free(increasing);
53 * volk_free(decreasing);
54 * volk_free(out);
55 * \endcode
56 */
57
58 #ifndef INCLUDED_volk_64f_x2_multiply_64f_H
59 #define INCLUDED_volk_64f_x2_multiply_64f_H
60
61 #include <inttypes.h>
62
63
64 #ifdef LV_HAVE_GENERIC
65
66 2 static inline void volk_64f_x2_multiply_64f_generic(double* cVector,
67 const double* aVector,
68 const double* bVector,
69 unsigned int num_points)
70 {
71 2 double* cPtr = cVector;
72 2 const double* aPtr = aVector;
73 2 const double* bPtr = bVector;
74 2 unsigned int number = 0;
75
76
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
77 262142 *cPtr++ = (*aPtr++) * (*bPtr++);
78 }
79 2 }
80
81 #endif /* LV_HAVE_GENERIC */
82
83 /*
84 * Unaligned versions
85 */
86
87 #ifdef LV_HAVE_SSE2
88
89 #include <emmintrin.h>
90
91 2 static inline void volk_64f_x2_multiply_64f_u_sse2(double* cVector,
92 const double* aVector,
93 const double* bVector,
94 unsigned int num_points)
95 {
96 2 unsigned int number = 0;
97 2 const unsigned int half_points = num_points / 2;
98
99 2 double* cPtr = cVector;
100 2 const double* aPtr = aVector;
101 2 const double* bPtr = bVector;
102
103 __m128d aVal, bVal, cVal;
104
2/2
✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.
131072 for (; number < half_points; number++) {
105 131070 aVal = _mm_loadu_pd(aPtr);
106 131070 bVal = _mm_loadu_pd(bPtr);
107
108 131070 cVal = _mm_mul_pd(aVal, bVal);
109
110 _mm_storeu_pd(cPtr, cVal); // Store the results back into the C container
111
112 131070 aPtr += 2;
113 131070 bPtr += 2;
114 131070 cPtr += 2;
115 }
116
117 2 number = half_points * 2;
118
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 for (; number < num_points; number++) {
119 2 *cPtr++ = (*aPtr++) * (*bPtr++);
120 }
121 2 }
122
123 #endif /* LV_HAVE_SSE2 */
124
125
126 #ifdef LV_HAVE_AVX
127
128 #include <immintrin.h>
129
130 2 static inline void volk_64f_x2_multiply_64f_u_avx(double* cVector,
131 const double* aVector,
132 const double* bVector,
133 unsigned int num_points)
134 {
135 2 unsigned int number = 0;
136 2 const unsigned int quarter_points = num_points / 4;
137
138 2 double* cPtr = cVector;
139 2 const double* aPtr = aVector;
140 2 const double* bPtr = bVector;
141
142 __m256d aVal, bVal, cVal;
143
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarter_points; number++) {
144
145 65534 aVal = _mm256_loadu_pd(aPtr);
146 65534 bVal = _mm256_loadu_pd(bPtr);
147
148 65534 cVal = _mm256_mul_pd(aVal, bVal);
149
150 _mm256_storeu_pd(cPtr, cVal); // Store the results back into the C container
151
152 65534 aPtr += 4;
153 65534 bPtr += 4;
154 65534 cPtr += 4;
155 }
156
157 2 number = quarter_points * 4;
158
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
159 6 *cPtr++ = (*aPtr++) * (*bPtr++);
160 }
161 2 }
162
163 #endif /* LV_HAVE_AVX */
164
165 /*
166 * Aligned versions
167 */
168
169 #ifdef LV_HAVE_SSE2
170
171 #include <emmintrin.h>
172
173 2 static inline void volk_64f_x2_multiply_64f_a_sse2(double* cVector,
174 const double* aVector,
175 const double* bVector,
176 unsigned int num_points)
177 {
178 2 unsigned int number = 0;
179 2 const unsigned int half_points = num_points / 2;
180
181 2 double* cPtr = cVector;
182 2 const double* aPtr = aVector;
183 2 const double* bPtr = bVector;
184
185 __m128d aVal, bVal, cVal;
186
2/2
✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.
131072 for (; number < half_points; number++) {
187 131070 aVal = _mm_load_pd(aPtr);
188 131070 bVal = _mm_load_pd(bPtr);
189
190 131070 cVal = _mm_mul_pd(aVal, bVal);
191
192 _mm_store_pd(cPtr, cVal); // Store the results back into the C container
193
194 131070 aPtr += 2;
195 131070 bPtr += 2;
196 131070 cPtr += 2;
197 }
198
199 2 number = half_points * 2;
200
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 for (; number < num_points; number++) {
201 2 *cPtr++ = (*aPtr++) * (*bPtr++);
202 }
203 2 }
204
205 #endif /* LV_HAVE_SSE2 */
206
207
208 #ifdef LV_HAVE_AVX
209
210 #include <immintrin.h>
211
212 2 static inline void volk_64f_x2_multiply_64f_a_avx(double* cVector,
213 const double* aVector,
214 const double* bVector,
215 unsigned int num_points)
216 {
217 2 unsigned int number = 0;
218 2 const unsigned int quarter_points = num_points / 4;
219
220 2 double* cPtr = cVector;
221 2 const double* aPtr = aVector;
222 2 const double* bPtr = bVector;
223
224 __m256d aVal, bVal, cVal;
225
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarter_points; number++) {
226
227 65534 aVal = _mm256_load_pd(aPtr);
228 65534 bVal = _mm256_load_pd(bPtr);
229
230 65534 cVal = _mm256_mul_pd(aVal, bVal);
231
232 _mm256_store_pd(cPtr, cVal); // Store the results back into the C container
233
234 65534 aPtr += 4;
235 65534 bPtr += 4;
236 65534 cPtr += 4;
237 }
238
239 2 number = quarter_points * 4;
240
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
241 6 *cPtr++ = (*aPtr++) * (*bPtr++);
242 }
243 2 }
244
245 #endif /* LV_HAVE_AVX */
246
247 #endif /* INCLUDED_volk_64f_x2_multiply_64f_u_H */
248