GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32f_exp_32f.h
Date:	2023-10-23 23:10:04

	Total	Coverage
Lines:	106	0.0%
Functions:	4	0.0%
Branches:	12	0.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2015-2020 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /* SIMD (SSE4) implementation of exp
    
         Inspired by Intel Approximate Math library, and based on the
    
         corresponding algorithms of the cephes math library
    
      */
    
      /* Copyright (C) 2007  Julien Pommier
    
        This software is provided 'as-is', without any express or implied
    
        warranty.  In no event will the authors be held liable for any damages
    
        arising from the use of this software.
    
        Permission is granted to anyone to use this software for any purpose,
    
        including commercial applications, and to alter it and redistribute it
    
        freely, subject to the following restrictions:
    
        1. The origin of this software must not be misrepresented; you must not
    
           claim that you wrote the original software. If you use this software
    
           in a product, an acknowledgment in the product documentation would be
    
           appreciated but is not required.
    
        2. Altered source versions must be plainly marked as such, and must not be
    
           misrepresented as being the original software.
    
        3. This notice may not be removed or altered from any source distribution.
    
        (this is the zlib license)
    
      */
    
      /*!
    
       * \page volk_32f_exp_32f
    
       *
    
       * \b Overview
    
       *
    
       * Computes exponential of input vector and stores results in output vector.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32f_exp_32f(float* bVector, const float* aVector, unsigned int num_points)
    
       * \endcode
    
       *
    
       * \b Inputs
    
       * \li aVector: The input vector of floats.
    
       * \li num_points: The number of data points.
    
       *
    
       * \b Outputs
    
       * \li bVector: The vector where results will be stored.
    
       *
    
       * \b Example
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   float* in = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *
    
       *   in[0] = 0;
    
       *   in[1] = 0.5;
    
       *   in[2] = std::sqrt(2.f)/2.f;
    
       *   in[3] = std::sqrt(3.f)/2.f;
    
       *   in[4] = in[5] = 1;
    
       *   for(unsigned int ii = 6; ii < N; ++ii){
    
       *       in[ii] = - in[N-ii-1];
    
       *   }
    
       *
    
       *   volk_32f_exp_32f(out, in, N);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("exp(%1.3f) = %1.3f\n", in[ii], out[ii]);
    
       *   }
    
       *
    
       *   volk_free(in);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #include <inttypes.h>
    
      #include <math.h>
    
      #include <stdio.h>
    
      #ifndef INCLUDED_volk_32f_exp_32f_a_H
    
      #define INCLUDED_volk_32f_exp_32f_a_H
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      static inline void
    
      ✗
      volk_32f_exp_32f_a_sse2(float* bVector, const float* aVector, unsigned int num_points)
    
      {
    
      ✗
          float* bPtr = bVector;
    
      ✗
          const float* aPtr = aVector;
    
      ✗
          unsigned int number = 0;
    
      ✗
          unsigned int quarterPoints = num_points / 4;
    
          // Declare variables and constants
    
          __m128 aVal, bVal, tmp, fx, mask, pow2n, z, y;
    
          __m128 one, exp_hi, exp_lo, log2EF, half, exp_C1, exp_C2;
    
          __m128 exp_p0, exp_p1, exp_p2, exp_p3, exp_p4, exp_p5;
    
          __m128i emm0, pi32_0x7f;
    
      ✗
          one = _mm_set1_ps(1.0);
    
      ✗
          exp_hi = _mm_set1_ps(88.3762626647949);
    
      ✗
          exp_lo = _mm_set1_ps(-88.3762626647949);
    
      ✗
          log2EF = _mm_set1_ps(1.44269504088896341);
    
      ✗
          half = _mm_set1_ps(0.5);
    
      ✗
          exp_C1 = _mm_set1_ps(0.693359375);
    
      ✗
          exp_C2 = _mm_set1_ps(-2.12194440e-4);
    
      ✗
          pi32_0x7f = _mm_set1_epi32(0x7f);
    
      ✗
          exp_p0 = _mm_set1_ps(1.9875691500e-4);
    
      ✗
          exp_p1 = _mm_set1_ps(1.3981999507e-3);
    
      ✗
          exp_p2 = _mm_set1_ps(8.3334519073e-3);
    
      ✗
          exp_p3 = _mm_set1_ps(4.1665795894e-2);
    
      ✗
          exp_p4 = _mm_set1_ps(1.6666665459e-1);
    
      ✗
          exp_p5 = _mm_set1_ps(5.0000001201e-1);
    
      ✗
          for (; number < quarterPoints; number++) {
    
      ✗
              aVal = _mm_load_ps(aPtr);
    
      ✗
              tmp = _mm_setzero_ps();
    
      ✗
              aVal = _mm_max_ps(_mm_min_ps(aVal, exp_hi), exp_lo);
    
              /* express exp(x) as exp(g + n*log(2)) */
    
      ✗
              fx = _mm_add_ps(_mm_mul_ps(aVal, log2EF), half);
    
      ✗
              emm0 = _mm_cvttps_epi32(fx);
    
      ✗
              tmp = _mm_cvtepi32_ps(emm0);
    
      ✗
              mask = _mm_and_ps(_mm_cmpgt_ps(tmp, fx), one);
    
      ✗
              fx = _mm_sub_ps(tmp, mask);
    
      ✗
              tmp = _mm_mul_ps(fx, exp_C1);
    
      ✗
              z = _mm_mul_ps(fx, exp_C2);
    
      ✗
              aVal = _mm_sub_ps(_mm_sub_ps(aVal, tmp), z);
    
      ✗
              z = _mm_mul_ps(aVal, aVal);
    
      ✗
              y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(exp_p0, aVal), exp_p1), aVal);
    
      ✗
              y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p2), aVal), exp_p3);
    
      ✗
              y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(y, aVal), exp_p4), aVal);
    
      ✗
              y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p5), z), aVal);
    
      ✗
              y = _mm_add_ps(y, one);
    
      ✗
              emm0 = _mm_slli_epi32(_mm_add_epi32(_mm_cvttps_epi32(fx), pi32_0x7f), 23);
    
      ✗
              pow2n = _mm_castsi128_ps(emm0);
    
      ✗
              bVal = _mm_mul_ps(y, pow2n);
    
              _mm_store_ps(bPtr, bVal);
    
      ✗
              aPtr += 4;
    
      ✗
              bPtr += 4;
    
          }
    
      ✗
          number = quarterPoints * 4;
    
      ✗
          for (; number < num_points; number++) {
    
      ✗
              *bPtr++ = expf(*aPtr++);
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_SSE2 for aligned */
    
      #ifdef LV_HAVE_GENERIC
    
      static inline void
    
      ✗
      volk_32f_exp_32f_a_generic(float* bVector, const float* aVector, unsigned int num_points)
    
      {
    
      ✗
          float* bPtr = bVector;
    
      ✗
          const float* aPtr = aVector;
    
      ✗
          unsigned int number = 0;
    
      ✗
          for (number = 0; number < num_points; number++) {
    
      ✗
              *bPtr++ = expf(*aPtr++);
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32f_exp_32f_a_H */
    
      #ifndef INCLUDED_volk_32f_exp_32f_u_H
    
      #define INCLUDED_volk_32f_exp_32f_u_H
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      static inline void
    
      ✗
      volk_32f_exp_32f_u_sse2(float* bVector, const float* aVector, unsigned int num_points)
    
      {
    
      ✗
          float* bPtr = bVector;
    
      ✗
          const float* aPtr = aVector;
    
      ✗
          unsigned int number = 0;
    
      ✗
          unsigned int quarterPoints = num_points / 4;
    
          // Declare variables and constants
    
          __m128 aVal, bVal, tmp, fx, mask, pow2n, z, y;
    
          __m128 one, exp_hi, exp_lo, log2EF, half, exp_C1, exp_C2;
    
          __m128 exp_p0, exp_p1, exp_p2, exp_p3, exp_p4, exp_p5;
    
          __m128i emm0, pi32_0x7f;
    
      ✗
          one = _mm_set1_ps(1.0);
    
      ✗
          exp_hi = _mm_set1_ps(88.3762626647949);
    
      ✗
          exp_lo = _mm_set1_ps(-88.3762626647949);
    
      ✗
          log2EF = _mm_set1_ps(1.44269504088896341);
    
      ✗
          half = _mm_set1_ps(0.5);
    
      ✗
          exp_C1 = _mm_set1_ps(0.693359375);
    
      ✗
          exp_C2 = _mm_set1_ps(-2.12194440e-4);
    
      ✗
          pi32_0x7f = _mm_set1_epi32(0x7f);
    
      ✗
          exp_p0 = _mm_set1_ps(1.9875691500e-4);
    
      ✗
          exp_p1 = _mm_set1_ps(1.3981999507e-3);
    
      ✗
          exp_p2 = _mm_set1_ps(8.3334519073e-3);
    
      ✗
          exp_p3 = _mm_set1_ps(4.1665795894e-2);
    
      ✗
          exp_p4 = _mm_set1_ps(1.6666665459e-1);
    
      ✗
          exp_p5 = _mm_set1_ps(5.0000001201e-1);
    
      ✗
          for (; number < quarterPoints; number++) {
    
      ✗
              aVal = _mm_loadu_ps(aPtr);
    
      ✗
              tmp = _mm_setzero_ps();
    
      ✗
              aVal = _mm_max_ps(_mm_min_ps(aVal, exp_hi), exp_lo);
    
              /* express exp(x) as exp(g + n*log(2)) */
    
      ✗
              fx = _mm_add_ps(_mm_mul_ps(aVal, log2EF), half);
    
      ✗
              emm0 = _mm_cvttps_epi32(fx);
    
      ✗
              tmp = _mm_cvtepi32_ps(emm0);
    
      ✗
              mask = _mm_and_ps(_mm_cmpgt_ps(tmp, fx), one);
    
      ✗
              fx = _mm_sub_ps(tmp, mask);
    
      ✗
              tmp = _mm_mul_ps(fx, exp_C1);
    
      ✗
              z = _mm_mul_ps(fx, exp_C2);
    
      ✗
              aVal = _mm_sub_ps(_mm_sub_ps(aVal, tmp), z);
    
      ✗
              z = _mm_mul_ps(aVal, aVal);
    
      ✗
              y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(exp_p0, aVal), exp_p1), aVal);
    
      ✗
              y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p2), aVal), exp_p3);
    
      ✗
              y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(y, aVal), exp_p4), aVal);
    
      ✗
              y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p5), z), aVal);
    
      ✗
              y = _mm_add_ps(y, one);
    
      ✗
              emm0 = _mm_slli_epi32(_mm_add_epi32(_mm_cvttps_epi32(fx), pi32_0x7f), 23);
    
      ✗
              pow2n = _mm_castsi128_ps(emm0);
    
      ✗
              bVal = _mm_mul_ps(y, pow2n);
    
              _mm_storeu_ps(bPtr, bVal);
    
      ✗
              aPtr += 4;
    
      ✗
              bPtr += 4;
    
          }
    
      ✗
          number = quarterPoints * 4;
    
      ✗
          for (; number < num_points; number++) {
    
      ✗
              *bPtr++ = expf(*aPtr++);
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_SSE2 for unaligned */
    
      #ifdef LV_HAVE_GENERIC
    
      static inline void
    
      ✗
      volk_32f_exp_32f_u_generic(float* bVector, const float* aVector, unsigned int num_points)
    
      {
    
      ✗
          float* bPtr = bVector;
    
      ✗
          const float* aPtr = aVector;
    
      ✗
          unsigned int number = 0;
    
      ✗
          for (number = 0; number < num_points; number++) {
    
      ✗
              *bPtr++ = expf(*aPtr++);
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32f_exp_32f_u_H */

Line	Exec	Source
1		/* -- c++ -- */
2		/*
3		* Copyright 2015-2020 Free Software Foundation, Inc.
4		*
5		* This file is part of VOLK
6		*
7		* SPDX-License-Identifier: LGPL-3.0-or-later
8		*/
9
10		/* SIMD (SSE4) implementation of exp
11		Inspired by Intel Approximate Math library, and based on the
12		corresponding algorithms of the cephes math library
13		*/
14
15		/* Copyright (C) 2007 Julien Pommier
16
17		This software is provided 'as-is', without any express or implied
18		warranty. In no event will the authors be held liable for any damages
19		arising from the use of this software.
20
21		Permission is granted to anyone to use this software for any purpose,
22		including commercial applications, and to alter it and redistribute it
23		freely, subject to the following restrictions:
24
25		1. The origin of this software must not be misrepresented; you must not
26		claim that you wrote the original software. If you use this software
27		in a product, an acknowledgment in the product documentation would be
28		appreciated but is not required.
29		2. Altered source versions must be plainly marked as such, and must not be
30		misrepresented as being the original software.
31		3. This notice may not be removed or altered from any source distribution.
32
33		(this is the zlib license)
34		*/
35
36		/*!
37		* \page volk_32f_exp_32f
38		*
39		* \b Overview
40		*
41		* Computes exponential of input vector and stores results in output vector.
42		*
43		* <b>Dispatcher Prototype</b>
44		* \code
45		* void volk_32f_exp_32f(float* bVector, const float* aVector, unsigned int num_points)
46		* \endcode
47		*
48		* \b Inputs
49		* \li aVector: The input vector of floats.
50		* \li num_points: The number of data points.
51		*
52		* \b Outputs
53		* \li bVector: The vector where results will be stored.
54		*
55		* \b Example
56		* \code
57		* int N = 10;
58		* unsigned int alignment = volk_get_alignment();
59		* float* in = (float)volk_malloc(sizeof(float)N, alignment);
60		* float* out = (float)volk_malloc(sizeof(float)N, alignment);
61		*
62		* in[0] = 0;
63		* in[1] = 0.5;
64		* in[2] = std::sqrt(2.f)/2.f;
65		* in[3] = std::sqrt(3.f)/2.f;
66		* in[4] = in[5] = 1;
67		* for(unsigned int ii = 6; ii < N; ++ii){
68		* in[ii] = - in[N-ii-1];
69		* }
70		*
71		* volk_32f_exp_32f(out, in, N);
72		*
73		* for(unsigned int ii = 0; ii < N; ++ii){
74		* printf("exp(%1.3f) = %1.3f\n", in[ii], out[ii]);
75		* }
76		*
77		* volk_free(in);
78		* volk_free(out);
79		* \endcode
80		*/
81
82		#include <inttypes.h>
83		#include <math.h>
84		#include <stdio.h>
85
86		#ifndef INCLUDED_volk_32f_exp_32f_a_H
87		#define INCLUDED_volk_32f_exp_32f_a_H
88
89		#ifdef LV_HAVE_SSE2
90		#include <emmintrin.h>
91
92		static inline void
93	✗	volk_32f_exp_32f_a_sse2(float* bVector, const float* aVector, unsigned int num_points)
94		{
95	✗	float* bPtr = bVector;
96	✗	const float* aPtr = aVector;
97
98	✗	unsigned int number = 0;
99	✗	unsigned int quarterPoints = num_points / 4;
100
101		// Declare variables and constants
102		__m128 aVal, bVal, tmp, fx, mask, pow2n, z, y;
103		__m128 one, exp_hi, exp_lo, log2EF, half, exp_C1, exp_C2;
104		__m128 exp_p0, exp_p1, exp_p2, exp_p3, exp_p4, exp_p5;
105		__m128i emm0, pi32_0x7f;
106
107	✗	one = _mm_set1_ps(1.0);
108	✗	exp_hi = _mm_set1_ps(88.3762626647949);
109	✗	exp_lo = _mm_set1_ps(-88.3762626647949);
110	✗	log2EF = _mm_set1_ps(1.44269504088896341);
111	✗	half = _mm_set1_ps(0.5);
112	✗	exp_C1 = _mm_set1_ps(0.693359375);
113	✗	exp_C2 = _mm_set1_ps(-2.12194440e-4);
114	✗	pi32_0x7f = _mm_set1_epi32(0x7f);
115
116	✗	exp_p0 = _mm_set1_ps(1.9875691500e-4);
117	✗	exp_p1 = _mm_set1_ps(1.3981999507e-3);
118	✗	exp_p2 = _mm_set1_ps(8.3334519073e-3);
119	✗	exp_p3 = _mm_set1_ps(4.1665795894e-2);
120	✗	exp_p4 = _mm_set1_ps(1.6666665459e-1);
121	✗	exp_p5 = _mm_set1_ps(5.0000001201e-1);
122
123	✗	for (; number < quarterPoints; number++) {
124	✗	aVal = _mm_load_ps(aPtr);
125	✗	tmp = _mm_setzero_ps();
126
127	✗	aVal = _mm_max_ps(_mm_min_ps(aVal, exp_hi), exp_lo);
128
129		/* express exp(x) as exp(g + nlog(2)) /
130	✗	fx = _mm_add_ps(_mm_mul_ps(aVal, log2EF), half);
131
132	✗	emm0 = _mm_cvttps_epi32(fx);
133	✗	tmp = _mm_cvtepi32_ps(emm0);
134
135	✗	mask = _mm_and_ps(_mm_cmpgt_ps(tmp, fx), one);
136	✗	fx = _mm_sub_ps(tmp, mask);
137
138	✗	tmp = _mm_mul_ps(fx, exp_C1);
139	✗	z = _mm_mul_ps(fx, exp_C2);
140	✗	aVal = _mm_sub_ps(_mm_sub_ps(aVal, tmp), z);
141	✗	z = _mm_mul_ps(aVal, aVal);
142
143	✗	y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(exp_p0, aVal), exp_p1), aVal);
144	✗	y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p2), aVal), exp_p3);
145	✗	y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(y, aVal), exp_p4), aVal);
146	✗	y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p5), z), aVal);
147	✗	y = _mm_add_ps(y, one);
148
149	✗	emm0 = _mm_slli_epi32(_mm_add_epi32(_mm_cvttps_epi32(fx), pi32_0x7f), 23);
150
151	✗	pow2n = _mm_castsi128_ps(emm0);
152	✗	bVal = _mm_mul_ps(y, pow2n);
153
154		_mm_store_ps(bPtr, bVal);
155	✗	aPtr += 4;
156	✗	bPtr += 4;
157		}
158
159	✗	number = quarterPoints * 4;
160	✗	for (; number < num_points; number++) {
161	✗	bPtr++ = expf(aPtr++);
162		}
163	✗	}
164
165		#endif /* LV_HAVE_SSE2 for aligned */
166
167
168		#ifdef LV_HAVE_GENERIC
169
170		static inline void
171	✗	volk_32f_exp_32f_a_generic(float* bVector, const float* aVector, unsigned int num_points)
172		{
173	✗	float* bPtr = bVector;
174	✗	const float* aPtr = aVector;
175	✗	unsigned int number = 0;
176
177	✗	for (number = 0; number < num_points; number++) {
178	✗	bPtr++ = expf(aPtr++);
179		}
180	✗	}
181
182		#endif /* LV_HAVE_GENERIC */
183
184		#endif /* INCLUDED_volk_32f_exp_32f_a_H */
185
186		#ifndef INCLUDED_volk_32f_exp_32f_u_H
187		#define INCLUDED_volk_32f_exp_32f_u_H
188
189		#ifdef LV_HAVE_SSE2
190		#include <emmintrin.h>
191
192		static inline void
193	✗	volk_32f_exp_32f_u_sse2(float* bVector, const float* aVector, unsigned int num_points)
194		{
195	✗	float* bPtr = bVector;
196	✗	const float* aPtr = aVector;
197
198	✗	unsigned int number = 0;
199	✗	unsigned int quarterPoints = num_points / 4;
200
201		// Declare variables and constants
202		__m128 aVal, bVal, tmp, fx, mask, pow2n, z, y;
203		__m128 one, exp_hi, exp_lo, log2EF, half, exp_C1, exp_C2;
204		__m128 exp_p0, exp_p1, exp_p2, exp_p3, exp_p4, exp_p5;
205		__m128i emm0, pi32_0x7f;
206
207	✗	one = _mm_set1_ps(1.0);
208	✗	exp_hi = _mm_set1_ps(88.3762626647949);
209	✗	exp_lo = _mm_set1_ps(-88.3762626647949);
210	✗	log2EF = _mm_set1_ps(1.44269504088896341);
211	✗	half = _mm_set1_ps(0.5);
212	✗	exp_C1 = _mm_set1_ps(0.693359375);
213	✗	exp_C2 = _mm_set1_ps(-2.12194440e-4);
214	✗	pi32_0x7f = _mm_set1_epi32(0x7f);
215
216	✗	exp_p0 = _mm_set1_ps(1.9875691500e-4);
217	✗	exp_p1 = _mm_set1_ps(1.3981999507e-3);
218	✗	exp_p2 = _mm_set1_ps(8.3334519073e-3);
219	✗	exp_p3 = _mm_set1_ps(4.1665795894e-2);
220	✗	exp_p4 = _mm_set1_ps(1.6666665459e-1);
221	✗	exp_p5 = _mm_set1_ps(5.0000001201e-1);
222
223
224	✗	for (; number < quarterPoints; number++) {
225	✗	aVal = _mm_loadu_ps(aPtr);
226	✗	tmp = _mm_setzero_ps();
227
228	✗	aVal = _mm_max_ps(_mm_min_ps(aVal, exp_hi), exp_lo);
229
230		/* express exp(x) as exp(g + nlog(2)) /
231	✗	fx = _mm_add_ps(_mm_mul_ps(aVal, log2EF), half);
232
233	✗	emm0 = _mm_cvttps_epi32(fx);
234	✗	tmp = _mm_cvtepi32_ps(emm0);
235
236	✗	mask = _mm_and_ps(_mm_cmpgt_ps(tmp, fx), one);
237	✗	fx = _mm_sub_ps(tmp, mask);
238
239	✗	tmp = _mm_mul_ps(fx, exp_C1);
240	✗	z = _mm_mul_ps(fx, exp_C2);
241	✗	aVal = _mm_sub_ps(_mm_sub_ps(aVal, tmp), z);
242	✗	z = _mm_mul_ps(aVal, aVal);
243
244	✗	y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(exp_p0, aVal), exp_p1), aVal);
245	✗	y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p2), aVal), exp_p3);
246	✗	y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(y, aVal), exp_p4), aVal);
247	✗	y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p5), z), aVal);
248	✗	y = _mm_add_ps(y, one);
249
250	✗	emm0 = _mm_slli_epi32(_mm_add_epi32(_mm_cvttps_epi32(fx), pi32_0x7f), 23);
251
252	✗	pow2n = _mm_castsi128_ps(emm0);
253	✗	bVal = _mm_mul_ps(y, pow2n);
254
255		_mm_storeu_ps(bPtr, bVal);
256	✗	aPtr += 4;
257	✗	bPtr += 4;
258		}
259
260	✗	number = quarterPoints * 4;
261	✗	for (; number < num_points; number++) {
262	✗	bPtr++ = expf(aPtr++);
263		}
264	✗	}
265
266		#endif /* LV_HAVE_SSE2 for unaligned */
267
268
269		#ifdef LV_HAVE_GENERIC
270
271		static inline void
272	✗	volk_32f_exp_32f_u_generic(float* bVector, const float* aVector, unsigned int num_points)
273		{
274	✗	float* bPtr = bVector;
275	✗	const float* aPtr = aVector;
276	✗	unsigned int number = 0;
277
278	✗	for (number = 0; number < num_points; number++) {
279	✗	bPtr++ = expf(aPtr++);
280		}
281	✗	}
282
283		#endif /* LV_HAVE_GENERIC */
284
285		#endif /* INCLUDED_volk_32f_exp_32f_u_H */
286