GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32f_x2_subtract_32f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	62	96	64.6%
Functions:	5	7	71.4%
Branches:	14	22	63.6%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32f_x2_subtract_32f
    
       *
    
       * \b Overview
    
       *
    
       * Subtracts values in bVector from values in aVector.
    
       *
    
       * c[i] = a[i] - b[i]
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32f_x2_subtract_32f(float* cVector, const float* aVector, const float*
    
       * bVector, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li aVector: The initial vector.
    
       * \li bVector: The vector to be subtracted.
    
       * \li num_points: The number of values in both input vectors.
    
       *
    
       * \b Outputs
    
       * \li complexVector: The output vector.
    
       *
    
       * \b Example
    
       * Subtract and increasing vector from a decreasing vector.
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   float* decreasing = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       increasing[ii] = (float)ii;
    
       *       decreasing[ii] = 10.f - (float)ii;
    
       *   }
    
       *
    
       *   volk_32f_x2_subtract_32f(out, increasing, decreasing, N);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out[%u] = %1.2f\n", ii, out[ii]);
    
       *   }
    
       *
    
       *   volk_free(increasing);
    
       *   volk_free(decreasing);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32f_x2_subtract_32f_a_H
    
      #define INCLUDED_volk_32f_x2_subtract_32f_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX512F
    
      #include <immintrin.h>
    
      ✗
      static inline void volk_32f_x2_subtract_32f_a_avx512f(float* cVector,
    
                                                            const float* aVector,
    
                                                            const float* bVector,
    
                                                            unsigned int num_points)
    
      {
    
      ✗
          unsigned int number = 0;
    
      ✗
          const unsigned int sixteenthPoints = num_points / 16;
    
      ✗
          float* cPtr = cVector;
    
      ✗
          const float* aPtr = aVector;
    
      ✗
          const float* bPtr = bVector;
    
          __m512 aVal, bVal, cVal;
    
      ✗
          for (; number < sixteenthPoints; number++) {
    
      ✗
              aVal = _mm512_load_ps(aPtr);
    
      ✗
              bVal = _mm512_load_ps(bPtr);
    
      ✗
              cVal = _mm512_sub_ps(aVal, bVal);
    
              _mm512_store_ps(cPtr, cVal); // Store the results back into the C container
    
      ✗
              aPtr += 16;
    
      ✗
              bPtr += 16;
    
      ✗
              cPtr += 16;
    
          }
    
      ✗
          number = sixteenthPoints * 16;
    
      ✗
          for (; number < num_points; number++) {
    
      ✗
              *cPtr++ = (*aPtr++) - (*bPtr++);
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_AVX512F */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_x2_subtract_32f_a_avx(float* cVector,
    
                                                        const float* aVector,
    
                                                        const float* bVector,
    
                                                        unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      2
          const float* bPtr = bVector;
    
          __m256 aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              aVal = _mm256_load_ps(aPtr);
    
      32766
              bVal = _mm256_load_ps(bPtr);
    
      32766
              cVal = _mm256_sub_ps(aVal, bVal);
    
              _mm256_store_ps(cPtr, cVal); // Store the results back into the C container
    
      32766
              aPtr += 8;
    
      32766
              bPtr += 8;
    
      32766
              cPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *cPtr++ = (*aPtr++) - (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32f_x2_subtract_32f_a_sse(float* cVector,
    
                                                        const float* aVector,
    
                                                        const float* bVector,
    
                                                        unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      2
          const float* bPtr = bVector;
    
          __m128 aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              aVal = _mm_load_ps(aPtr);
    
      65534
              bVal = _mm_load_ps(bPtr);
    
      65534
              cVal = _mm_sub_ps(aVal, bVal);
    
              _mm_store_ps(cPtr, cVal); // Store the results back into the C container
    
      65534
              aPtr += 4;
    
      65534
              bPtr += 4;
    
      65534
              cPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *cPtr++ = (*aPtr++) - (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32f_x2_subtract_32f_generic(float* cVector,
    
                                                          const float* aVector,
    
                                                          const float* bVector,
    
                                                          unsigned int num_points)
    
      {
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      2
          const float* bPtr = bVector;
    
      2
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *cPtr++ = (*aPtr++) - (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #ifdef LV_HAVE_NEON
    
      #include <arm_neon.h>
    
      static inline void volk_32f_x2_subtract_32f_neon(float* cVector,
    
                                                       const float* aVector,
    
                                                       const float* bVector,
    
                                                       unsigned int num_points)
    
      {
    
          float* cPtr = cVector;
    
          const float* aPtr = aVector;
    
          const float* bPtr = bVector;
    
          unsigned int number = 0;
    
          unsigned int quarter_points = num_points / 4;
    
          float32x4_t a_vec, b_vec, c_vec;
    
          for (number = 0; number < quarter_points; number++) {
    
              a_vec = vld1q_f32(aPtr);
    
              b_vec = vld1q_f32(bPtr);
    
              c_vec = vsubq_f32(a_vec, b_vec);
    
              vst1q_f32(cPtr, c_vec);
    
              aPtr += 4;
    
              bPtr += 4;
    
              cPtr += 4;
    
          }
    
          for (number = quarter_points * 4; number < num_points; number++) {
    
              *cPtr++ = (*aPtr++) - (*bPtr++);
    
          }
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #ifdef LV_HAVE_ORC
    
      extern void volk_32f_x2_subtract_32f_a_orc_impl(float* cVector,
    
                                                      const float* aVector,
    
                                                      const float* bVector,
    
                                                      unsigned int num_points);
    
      2
      static inline void volk_32f_x2_subtract_32f_u_orc(float* cVector,
    
                                                        const float* aVector,
    
                                                        const float* bVector,
    
                                                        unsigned int num_points)
    
      {
    
      2
          volk_32f_x2_subtract_32f_a_orc_impl(cVector, aVector, bVector, num_points);
    
      2
      }
    
      #endif /* LV_HAVE_ORC */
    
      #endif /* INCLUDED_volk_32f_x2_subtract_32f_a_H */
    
      #ifndef INCLUDED_volk_32f_x2_subtract_32f_u_H
    
      #define INCLUDED_volk_32f_x2_subtract_32f_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX512F
    
      #include <immintrin.h>
    
      ✗
      static inline void volk_32f_x2_subtract_32f_u_avx512f(float* cVector,
    
                                                            const float* aVector,
    
                                                            const float* bVector,
    
                                                            unsigned int num_points)
    
      {
    
      ✗
          unsigned int number = 0;
    
      ✗
          const unsigned int sixteenthPoints = num_points / 16;
    
      ✗
          float* cPtr = cVector;
    
      ✗
          const float* aPtr = aVector;
    
      ✗
          const float* bPtr = bVector;
    
          __m512 aVal, bVal, cVal;
    
      ✗
          for (; number < sixteenthPoints; number++) {
    
      ✗
              aVal = _mm512_loadu_ps(aPtr);
    
      ✗
              bVal = _mm512_loadu_ps(bPtr);
    
      ✗
              cVal = _mm512_sub_ps(aVal, bVal);
    
              _mm512_storeu_ps(cPtr, cVal); // Store the results back into the C container
    
      ✗
              aPtr += 16;
    
      ✗
              bPtr += 16;
    
      ✗
              cPtr += 16;
    
          }
    
      ✗
          number = sixteenthPoints * 16;
    
      ✗
          for (; number < num_points; number++) {
    
      ✗
              *cPtr++ = (*aPtr++) - (*bPtr++);
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_AVX512F */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_x2_subtract_32f_u_avx(float* cVector,
    
                                                        const float* aVector,
    
                                                        const float* bVector,
    
                                                        unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      2
          const float* bPtr = bVector;
    
          __m256 aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              aVal = _mm256_loadu_ps(aPtr);
    
      32766
              bVal = _mm256_loadu_ps(bPtr);
    
      32766
              cVal = _mm256_sub_ps(aVal, bVal);
    
              _mm256_storeu_ps(cPtr, cVal); // Store the results back into the C container
    
      32766
              aPtr += 8;
    
      32766
              bPtr += 8;
    
      32766
              cPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *cPtr++ = (*aPtr++) - (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #endif /* INCLUDED_volk_32f_x2_subtract_32f_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32f_x2_subtract_32f
12			*
13			* \b Overview
14			*
15			* Subtracts values in bVector from values in aVector.
16			*
17			* c[i] = a[i] - b[i]
18			*
19			* <b>Dispatcher Prototype</b>
20			* \code
21			* void volk_32f_x2_subtract_32f(float* cVector, const float* aVector, const float*
22			* bVector, unsigned int num_points) \endcode
23			*
24			* \b Inputs
25			* \li aVector: The initial vector.
26			* \li bVector: The vector to be subtracted.
27			* \li num_points: The number of values in both input vectors.
28			*
29			* \b Outputs
30			* \li complexVector: The output vector.
31			*
32			* \b Example
33			* Subtract and increasing vector from a decreasing vector.
34			* \code
35			* int N = 10;
36			* unsigned int alignment = volk_get_alignment();
37			* float* increasing = (float)volk_malloc(sizeof(float)N, alignment);
38			* float* decreasing = (float)volk_malloc(sizeof(float)N, alignment);
39			* float* out = (float)volk_malloc(sizeof(float)N, alignment);
40			*
41			* for(unsigned int ii = 0; ii < N; ++ii){
42			* increasing[ii] = (float)ii;
43			* decreasing[ii] = 10.f - (float)ii;
44			* }
45			*
46			* volk_32f_x2_subtract_32f(out, increasing, decreasing, N);
47			*
48			* for(unsigned int ii = 0; ii < N; ++ii){
49			* printf("out[%u] = %1.2f\n", ii, out[ii]);
50			* }
51			*
52			* volk_free(increasing);
53			* volk_free(decreasing);
54			* volk_free(out);
55			* \endcode
56			*/
57
58			#ifndef INCLUDED_volk_32f_x2_subtract_32f_a_H
59			#define INCLUDED_volk_32f_x2_subtract_32f_a_H
60
61			#include <inttypes.h>
62			#include <stdio.h>
63
64			#ifdef LV_HAVE_AVX512F
65			#include <immintrin.h>
66
67		✗	static inline void volk_32f_x2_subtract_32f_a_avx512f(float* cVector,
68			const float* aVector,
69			const float* bVector,
70			unsigned int num_points)
71			{
72		✗	unsigned int number = 0;
73		✗	const unsigned int sixteenthPoints = num_points / 16;
74
75		✗	float* cPtr = cVector;
76		✗	const float* aPtr = aVector;
77		✗	const float* bPtr = bVector;
78
79			__m512 aVal, bVal, cVal;
80		✗	for (; number < sixteenthPoints; number++) {
81
82		✗	aVal = _mm512_load_ps(aPtr);
83		✗	bVal = _mm512_load_ps(bPtr);
84
85		✗	cVal = _mm512_sub_ps(aVal, bVal);
86
87			_mm512_store_ps(cPtr, cVal); // Store the results back into the C container
88
89		✗	aPtr += 16;
90		✗	bPtr += 16;
91		✗	cPtr += 16;
92			}
93
94		✗	number = sixteenthPoints * 16;
95		✗	for (; number < num_points; number++) {
96		✗	cPtr++ = (aPtr++) - (*bPtr++);
97			}
98		✗	}
99			#endif /* LV_HAVE_AVX512F */
100
101			#ifdef LV_HAVE_AVX
102			#include <immintrin.h>
103
104		2	static inline void volk_32f_x2_subtract_32f_a_avx(float* cVector,
105			const float* aVector,
106			const float* bVector,
107			unsigned int num_points)
108			{
109		2	unsigned int number = 0;
110		2	const unsigned int eighthPoints = num_points / 8;
111
112		2	float* cPtr = cVector;
113		2	const float* aPtr = aVector;
114		2	const float* bPtr = bVector;
115
116			__m256 aVal, bVal, cVal;
117	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
118
119		32766	aVal = _mm256_load_ps(aPtr);
120		32766	bVal = _mm256_load_ps(bPtr);
121
122		32766	cVal = _mm256_sub_ps(aVal, bVal);
123
124			_mm256_store_ps(cPtr, cVal); // Store the results back into the C container
125
126		32766	aPtr += 8;
127		32766	bPtr += 8;
128		32766	cPtr += 8;
129			}
130
131		2	number = eighthPoints * 8;
132	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
133		14	cPtr++ = (aPtr++) - (*bPtr++);
134			}
135		2	}
136			#endif /* LV_HAVE_AVX */
137
138			#ifdef LV_HAVE_SSE
139			#include <xmmintrin.h>
140
141		2	static inline void volk_32f_x2_subtract_32f_a_sse(float* cVector,
142			const float* aVector,
143			const float* bVector,
144			unsigned int num_points)
145			{
146		2	unsigned int number = 0;
147		2	const unsigned int quarterPoints = num_points / 4;
148
149		2	float* cPtr = cVector;
150		2	const float* aPtr = aVector;
151		2	const float* bPtr = bVector;
152
153			__m128 aVal, bVal, cVal;
154	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
155
156		65534	aVal = _mm_load_ps(aPtr);
157		65534	bVal = _mm_load_ps(bPtr);
158
159		65534	cVal = _mm_sub_ps(aVal, bVal);
160
161			_mm_store_ps(cPtr, cVal); // Store the results back into the C container
162
163		65534	aPtr += 4;
164		65534	bPtr += 4;
165		65534	cPtr += 4;
166			}
167
168		2	number = quarterPoints * 4;
169	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
170		6	cPtr++ = (aPtr++) - (*bPtr++);
171			}
172		2	}
173			#endif /* LV_HAVE_SSE */
174
175
176			#ifdef LV_HAVE_GENERIC
177
178		2	static inline void volk_32f_x2_subtract_32f_generic(float* cVector,
179			const float* aVector,
180			const float* bVector,
181			unsigned int num_points)
182			{
183		2	float* cPtr = cVector;
184		2	const float* aPtr = aVector;
185		2	const float* bPtr = bVector;
186		2	unsigned int number = 0;
187
188	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
189		262142	cPtr++ = (aPtr++) - (*bPtr++);
190			}
191		2	}
192			#endif /* LV_HAVE_GENERIC */
193
194
195			#ifdef LV_HAVE_NEON
196			#include <arm_neon.h>
197
198			static inline void volk_32f_x2_subtract_32f_neon(float* cVector,
199			const float* aVector,
200			const float* bVector,
201			unsigned int num_points)
202			{
203			float* cPtr = cVector;
204			const float* aPtr = aVector;
205			const float* bPtr = bVector;
206			unsigned int number = 0;
207			unsigned int quarter_points = num_points / 4;
208
209			float32x4_t a_vec, b_vec, c_vec;
210
211			for (number = 0; number < quarter_points; number++) {
212			a_vec = vld1q_f32(aPtr);
213			b_vec = vld1q_f32(bPtr);
214			c_vec = vsubq_f32(a_vec, b_vec);
215			vst1q_f32(cPtr, c_vec);
216			aPtr += 4;
217			bPtr += 4;
218			cPtr += 4;
219			}
220
221			for (number = quarter_points * 4; number < num_points; number++) {
222			cPtr++ = (aPtr++) - (*bPtr++);
223			}
224			}
225			#endif /* LV_HAVE_NEON */
226
227
228			#ifdef LV_HAVE_ORC
229			extern void volk_32f_x2_subtract_32f_a_orc_impl(float* cVector,
230			const float* aVector,
231			const float* bVector,
232			unsigned int num_points);
233
234		2	static inline void volk_32f_x2_subtract_32f_u_orc(float* cVector,
235			const float* aVector,
236			const float* bVector,
237			unsigned int num_points)
238			{
239		2	volk_32f_x2_subtract_32f_a_orc_impl(cVector, aVector, bVector, num_points);
240		2	}
241			#endif /* LV_HAVE_ORC */
242
243
244			#endif /* INCLUDED_volk_32f_x2_subtract_32f_a_H */
245
246
247			#ifndef INCLUDED_volk_32f_x2_subtract_32f_u_H
248			#define INCLUDED_volk_32f_x2_subtract_32f_u_H
249
250			#include <inttypes.h>
251			#include <stdio.h>
252
253			#ifdef LV_HAVE_AVX512F
254			#include <immintrin.h>
255
256		✗	static inline void volk_32f_x2_subtract_32f_u_avx512f(float* cVector,
257			const float* aVector,
258			const float* bVector,
259			unsigned int num_points)
260			{
261		✗	unsigned int number = 0;
262		✗	const unsigned int sixteenthPoints = num_points / 16;
263
264		✗	float* cPtr = cVector;
265		✗	const float* aPtr = aVector;
266		✗	const float* bPtr = bVector;
267
268			__m512 aVal, bVal, cVal;
269		✗	for (; number < sixteenthPoints; number++) {
270
271		✗	aVal = _mm512_loadu_ps(aPtr);
272		✗	bVal = _mm512_loadu_ps(bPtr);
273
274		✗	cVal = _mm512_sub_ps(aVal, bVal);
275
276			_mm512_storeu_ps(cPtr, cVal); // Store the results back into the C container
277
278		✗	aPtr += 16;
279		✗	bPtr += 16;
280		✗	cPtr += 16;
281			}
282
283		✗	number = sixteenthPoints * 16;
284		✗	for (; number < num_points; number++) {
285		✗	cPtr++ = (aPtr++) - (*bPtr++);
286			}
287		✗	}
288			#endif /* LV_HAVE_AVX512F */
289
290
291			#ifdef LV_HAVE_AVX
292			#include <immintrin.h>
293
294		2	static inline void volk_32f_x2_subtract_32f_u_avx(float* cVector,
295			const float* aVector,
296			const float* bVector,
297			unsigned int num_points)
298			{
299		2	unsigned int number = 0;
300		2	const unsigned int eighthPoints = num_points / 8;
301
302		2	float* cPtr = cVector;
303		2	const float* aPtr = aVector;
304		2	const float* bPtr = bVector;
305
306			__m256 aVal, bVal, cVal;
307	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
308
309		32766	aVal = _mm256_loadu_ps(aPtr);
310		32766	bVal = _mm256_loadu_ps(bPtr);
311
312		32766	cVal = _mm256_sub_ps(aVal, bVal);
313
314			_mm256_storeu_ps(cPtr, cVal); // Store the results back into the C container
315
316		32766	aPtr += 8;
317		32766	bPtr += 8;
318		32766	cPtr += 8;
319			}
320
321		2	number = eighthPoints * 8;
322	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
323		14	cPtr++ = (aPtr++) - (*bPtr++);
324			}
325		2	}
326			#endif /* LV_HAVE_AVX */
327
328			#endif /* INCLUDED_volk_32f_x2_subtract_32f_u_H */
329