GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32f_s32f_power_32f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	21	21	100.0%
Functions:	3	3	100.0%
Branches:	6	6	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32f_s32f_power_32f
    
       *
    
       * \b Overview
    
       *
    
       * Takes each input vector value to the specified power and stores the
    
       * results in the return vector.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32f_s32f_power_32f(float* cVector, const float* aVector, const float power,
    
       * unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li aVector: The input vector of floats.
    
       * \li power: The power to raise the input value to.
    
       * \li num_points: The number of data points.
    
       *
    
       * \b Outputs
    
       * \li cVector: The output vector.
    
       *
    
       * \b Example
    
       * Square the numbers (0,9)
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       increasing[ii] = (float)ii;
    
       *   }
    
       *
    
       *   // Normalize by the smallest delta (0.2 in this example)
    
       *   float scale = 2.0f;
    
       *
    
       *   volk_32f_s32f_power_32f(out, increasing, scale, N);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out[%u] = %f\n", ii, out[ii]);
    
       *   }
    
       *
    
       *   volk_free(increasing);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
    
      #define INCLUDED_volk_32f_s32f_power_32f_a_H
    
      #include <inttypes.h>
    
      #include <math.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_SSE4_1
    
      #include <tmmintrin.h>
    
      #ifdef LV_HAVE_LIB_SIMDMATH
    
      #include <simdmath.h>
    
      #endif /* LV_HAVE_LIB_SIMDMATH */
    
      2
      static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector,
    
                                                          const float* aVector,
    
                                                          const float power,
    
                                                          unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      #ifdef LV_HAVE_LIB_SIMDMATH
    
          const unsigned int quarterPoints = num_points / 4;
    
          __m128 vPower = _mm_set_ps1(power);
    
          __m128 zeroValue = _mm_setzero_ps();
    
          __m128 signMask;
    
          __m128 negatedValues;
    
          __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
    
          __m128 onesMask = _mm_set_ps1(1);
    
          __m128 aVal, cVal;
    
          for (; number < quarterPoints; number++) {
    
              aVal = _mm_load_ps(aPtr);
    
              signMask = _mm_cmplt_ps(aVal, zeroValue);
    
              negatedValues = _mm_sub_ps(zeroValue, aVal);
    
              aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
    
              // powf4 doesn't support negative values in the base, so we mask them off and then
    
              // apply the negative after
    
              cVal = powf4(aVal, vPower); // Takes each input value to the specified power
    
              cVal = _mm_mul_ps(_mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
    
              _mm_store_ps(cPtr, cVal); // Store the results back into the C container
    
              aPtr += 4;
    
              cPtr += 4;
    
          }
    
          number = quarterPoints * 4;
    
      #endif /* LV_HAVE_LIB_SIMDMATH */
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (; number < num_points; number++) {
    
      262142
              *cPtr++ = powf((*aPtr++), power);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE4_1 */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      #ifdef LV_HAVE_LIB_SIMDMATH
    
      #include <simdmath.h>
    
      #endif /* LV_HAVE_LIB_SIMDMATH */
    
      2
      static inline void volk_32f_s32f_power_32f_a_sse(float* cVector,
    
                                                       const float* aVector,
    
                                                       const float power,
    
                                                       unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      #ifdef LV_HAVE_LIB_SIMDMATH
    
          const unsigned int quarterPoints = num_points / 4;
    
          __m128 vPower = _mm_set_ps1(power);
    
          __m128 zeroValue = _mm_setzero_ps();
    
          __m128 signMask;
    
          __m128 negatedValues;
    
          __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
    
          __m128 onesMask = _mm_set_ps1(1);
    
          __m128 aVal, cVal;
    
          for (; number < quarterPoints; number++) {
    
              aVal = _mm_load_ps(aPtr);
    
              signMask = _mm_cmplt_ps(aVal, zeroValue);
    
              negatedValues = _mm_sub_ps(zeroValue, aVal);
    
              aVal =
    
                  _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues));
    
              // powf4 doesn't support negative values in the base, so we mask them off and then
    
              // apply the negative after
    
              cVal = powf4(aVal, vPower); // Takes each input value to the specified power
    
              cVal = _mm_mul_ps(_mm_or_ps(_mm_andnot_ps(signMask, onesMask),
    
                                          _mm_and_ps(signMask, negativeOneToPower)),
    
                                cVal);
    
              _mm_store_ps(cPtr, cVal); // Store the results back into the C container
    
              aPtr += 4;
    
              cPtr += 4;
    
          }
    
          number = quarterPoints * 4;
    
      #endif /* LV_HAVE_LIB_SIMDMATH */
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (; number < num_points; number++) {
    
      262142
              *cPtr++ = powf((*aPtr++), power);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32f_s32f_power_32f_generic(float* cVector,
    
                                                         const float* aVector,
    
                                                         const float power,
    
                                                         unsigned int num_points)
    
      {
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      2
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *cPtr++ = powf((*aPtr++), power);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32f_s32f_power_32f_a_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32f_s32f_power_32f
12			*
13			* \b Overview
14			*
15			* Takes each input vector value to the specified power and stores the
16			* results in the return vector.
17			*
18			* <b>Dispatcher Prototype</b>
19			* \code
20			* void volk_32f_s32f_power_32f(float* cVector, const float* aVector, const float power,
21			* unsigned int num_points) \endcode
22			*
23			* \b Inputs
24			* \li aVector: The input vector of floats.
25			* \li power: The power to raise the input value to.
26			* \li num_points: The number of data points.
27			*
28			* \b Outputs
29			* \li cVector: The output vector.
30			*
31			* \b Example
32			* Square the numbers (0,9)
33			* \code
34			* int N = 10;
35			* unsigned int alignment = volk_get_alignment();
36			* float* increasing = (float)volk_malloc(sizeof(float)N, alignment);
37			* float* out = (float)volk_malloc(sizeof(float)N, alignment);
38			*
39			*
40			* for(unsigned int ii = 0; ii < N; ++ii){
41			* increasing[ii] = (float)ii;
42			* }
43			*
44			* // Normalize by the smallest delta (0.2 in this example)
45			* float scale = 2.0f;
46			*
47			* volk_32f_s32f_power_32f(out, increasing, scale, N);
48			*
49			* for(unsigned int ii = 0; ii < N; ++ii){
50			* printf("out[%u] = %f\n", ii, out[ii]);
51			* }
52			*
53			* volk_free(increasing);
54			* volk_free(out);
55			* \endcode
56			*/
57
58			#ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
59			#define INCLUDED_volk_32f_s32f_power_32f_a_H
60
61			#include <inttypes.h>
62			#include <math.h>
63			#include <stdio.h>
64
65			#ifdef LV_HAVE_SSE4_1
66			#include <tmmintrin.h>
67
68			#ifdef LV_HAVE_LIB_SIMDMATH
69			#include <simdmath.h>
70			#endif /* LV_HAVE_LIB_SIMDMATH */
71
72		2	static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector,
73			const float* aVector,
74			const float power,
75			unsigned int num_points)
76			{
77		2	unsigned int number = 0;
78
79		2	float* cPtr = cVector;
80		2	const float* aPtr = aVector;
81
82			#ifdef LV_HAVE_LIB_SIMDMATH
83			const unsigned int quarterPoints = num_points / 4;
84			__m128 vPower = _mm_set_ps1(power);
85			__m128 zeroValue = _mm_setzero_ps();
86			__m128 signMask;
87			__m128 negatedValues;
88			__m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
89			__m128 onesMask = _mm_set_ps1(1);
90
91			__m128 aVal, cVal;
92			for (; number < quarterPoints; number++) {
93
94			aVal = _mm_load_ps(aPtr);
95			signMask = _mm_cmplt_ps(aVal, zeroValue);
96			negatedValues = _mm_sub_ps(zeroValue, aVal);
97			aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
98
99			// powf4 doesn't support negative values in the base, so we mask them off and then
100			// apply the negative after
101			cVal = powf4(aVal, vPower); // Takes each input value to the specified power
102
103			cVal = _mm_mul_ps(_mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
104
105			_mm_store_ps(cPtr, cVal); // Store the results back into the C container
106
107			aPtr += 4;
108			cPtr += 4;
109			}
110
111			number = quarterPoints * 4;
112			#endif /* LV_HAVE_LIB_SIMDMATH */
113
114	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (; number < num_points; number++) {
115		262142	cPtr++ = powf((aPtr++), power);
116			}
117		2	}
118
119			#endif /* LV_HAVE_SSE4_1 */
120
121
122			#ifdef LV_HAVE_SSE
123			#include <xmmintrin.h>
124
125			#ifdef LV_HAVE_LIB_SIMDMATH
126			#include <simdmath.h>
127			#endif /* LV_HAVE_LIB_SIMDMATH */
128
129		2	static inline void volk_32f_s32f_power_32f_a_sse(float* cVector,
130			const float* aVector,
131			const float power,
132			unsigned int num_points)
133			{
134		2	unsigned int number = 0;
135
136		2	float* cPtr = cVector;
137		2	const float* aPtr = aVector;
138
139			#ifdef LV_HAVE_LIB_SIMDMATH
140			const unsigned int quarterPoints = num_points / 4;
141			__m128 vPower = _mm_set_ps1(power);
142			__m128 zeroValue = _mm_setzero_ps();
143			__m128 signMask;
144			__m128 negatedValues;
145			__m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
146			__m128 onesMask = _mm_set_ps1(1);
147
148			__m128 aVal, cVal;
149			for (; number < quarterPoints; number++) {
150
151			aVal = _mm_load_ps(aPtr);
152			signMask = _mm_cmplt_ps(aVal, zeroValue);
153			negatedValues = _mm_sub_ps(zeroValue, aVal);
154			aVal =
155			_mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues));
156
157			// powf4 doesn't support negative values in the base, so we mask them off and then
158			// apply the negative after
159			cVal = powf4(aVal, vPower); // Takes each input value to the specified power
160
161			cVal = _mm_mul_ps(_mm_or_ps(_mm_andnot_ps(signMask, onesMask),
162			_mm_and_ps(signMask, negativeOneToPower)),
163			cVal);
164
165			_mm_store_ps(cPtr, cVal); // Store the results back into the C container
166
167			aPtr += 4;
168			cPtr += 4;
169			}
170
171			number = quarterPoints * 4;
172			#endif /* LV_HAVE_LIB_SIMDMATH */
173
174	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (; number < num_points; number++) {
175		262142	cPtr++ = powf((aPtr++), power);
176			}
177		2	}
178
179			#endif /* LV_HAVE_SSE */
180
181
182			#ifdef LV_HAVE_GENERIC
183
184		2	static inline void volk_32f_s32f_power_32f_generic(float* cVector,
185			const float* aVector,
186			const float power,
187			unsigned int num_points)
188			{
189		2	float* cPtr = cVector;
190		2	const float* aPtr = aVector;
191		2	unsigned int number = 0;
192
193	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
194		262142	cPtr++ = powf((aPtr++), power);
195			}
196		2	}
197			#endif /* LV_HAVE_GENERIC */
198
199
200			#endif /* INCLUDED_volk_32f_s32f_power_32f_a_H */
201