GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32f_s32f_convert_32i.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	157	171	91.8%
Functions:	8	8	100.0%
Branches:	40	54	74.1%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32f_s32f_convert_32i
    
       *
    
       * \b Overview
    
       *
    
       * Converts a floating point number to a 32-bit integer after applying a
    
       * scaling factor.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32f_s32f_convert_32i(int32_t* outputVector, const float* inputVector, const
    
       * float scalar, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li inputVector: the input vector of floats.
    
       * \li scalar: The value multiplied against each point in the input buffer.
    
       * \li num_points: The number of data points.
    
       *
    
       * \b Outputs
    
       * \li outputVector: The output vector.
    
       *
    
       * \b Example
    
       * Convert floats from [-1,1] to integers with a scale of 5 to maintain smallest delta
    
       * \code
    
       *  int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   int32_t* out = (int32_t*)volk_malloc(sizeof(int32_t)*N, alignment);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       increasing[ii] = 2.f * ((float)ii / (float)N) - 1.f;
    
       *   }
    
       *
    
       *   // Normalize by the smallest delta (0.2 in this example)
    
       *   float scale = 5.f;
    
       *
    
       *   volk_32f_s32f_convert_32i(out, increasing, scale, N);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out[%u] = %i\n", ii, out[ii]);
    
       *   }
    
       *
    
       *   volk_free(increasing);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
    
      #define INCLUDED_volk_32f_s32f_convert_32i_u_H
    
      #include <inttypes.h>
    
      #include <limits.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_s32f_convert_32i_u_avx(int32_t* outputVector,
    
                                                         const float* inputVector,
    
                                                         const float scalar,
    
                                                         unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const float* inputVectorPtr = (const float*)inputVector;
    
      2
          int32_t* outputVectorPtr = outputVector;
    
      2
          float min_val = INT_MIN;
    
      2
          float max_val = (uint32_t)INT_MAX + 1;
    
          float r;
    
      2
          __m256 vScalar = _mm256_set1_ps(scalar);
    
          __m256 inputVal1;
    
          __m256i intInputVal1;
    
      2
          __m256 vmin_val = _mm256_set1_ps(min_val);
    
      2
          __m256 vmax_val = _mm256_set1_ps(max_val);
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              inputVal1 = _mm256_loadu_ps(inputVectorPtr);
    
      32766
              inputVectorPtr += 8;
    
      98298
              inputVal1 = _mm256_max_ps(
    
                  _mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
    
      32766
              intInputVal1 = _mm256_cvtps_epi32(inputVal1);
    
              _mm256_storeu_si256((__m256i*)outputVectorPtr, intInputVal1);
    
      32766
              outputVectorPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              r = inputVector[number] * scalar;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.

      14
              if (r > max_val)
    
      ✗
                  r = max_val;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.

      14
              else if (r < min_val)
    
      ✗
                  r = min_val;
    
      14
              outputVector[number] = (int32_t)rintf(r);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      2
      static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector,
    
                                                          const float* inputVector,
    
                                                          const float scalar,
    
                                                          unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* inputVectorPtr = (const float*)inputVector;
    
      2
          int32_t* outputVectorPtr = outputVector;
    
      2
          float min_val = INT_MIN;
    
      2
          float max_val = (uint32_t)INT_MAX + 1;
    
          float r;
    
      2
          __m128 vScalar = _mm_set_ps1(scalar);
    
          __m128 inputVal1;
    
          __m128i intInputVal1;
    
      2
          __m128 vmin_val = _mm_set_ps1(min_val);
    
      2
          __m128 vmax_val = _mm_set_ps1(max_val);
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              inputVal1 = _mm_loadu_ps(inputVectorPtr);
    
      65534
              inputVectorPtr += 4;
    
              inputVal1 =
    
      196602
                  _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
    
      65534
              intInputVal1 = _mm_cvtps_epi32(inputVal1);
    
              _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
    
      65534
              outputVectorPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              r = inputVector[number] * scalar;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.

      6
              if (r > max_val)
    
      ✗
                  r = max_val;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.

      6
              else if (r < min_val)
    
      ✗
                  r = min_val;
    
      6
              outputVector[number] = (int32_t)rintf(r);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE2 */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector,
    
                                                         const float* inputVector,
    
                                                         const float scalar,
    
                                                         unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* inputVectorPtr = (const float*)inputVector;
    
      2
          int32_t* outputVectorPtr = outputVector;
    
      2
          float min_val = INT_MIN;
    
      2
          float max_val = (uint32_t)INT_MAX + 1;
    
          float r;
    
      2
          __m128 vScalar = _mm_set_ps1(scalar);
    
          __m128 ret;
    
      2
          __m128 vmin_val = _mm_set_ps1(min_val);
    
      2
          __m128 vmax_val = _mm_set_ps1(max_val);
    
          __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              ret = _mm_loadu_ps(inputVectorPtr);
    
      65534
              inputVectorPtr += 4;
    
      196602
              ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
    
              _mm_store_ps(outputFloatBuffer, ret);
    
      65534
              *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[0]);
    
      65534
              *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[1]);
    
      65534
              *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[2]);
    
      65534
              *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[3]);
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              r = inputVector[number] * scalar;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.

      6
              if (r > max_val)
    
      ✗
                  r = max_val;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.

      6
              else if (r < min_val)
    
      ✗
                  r = min_val;
    
      6
              outputVector[number] = (int32_t)rintf(r);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      4
      static inline void volk_32f_s32f_convert_32i_generic(int32_t* outputVector,
    
                                                           const float* inputVector,
    
                                                           const float scalar,
    
                                                           unsigned int num_points)
    
      {
    
      4
          int32_t* outputVectorPtr = outputVector;
    
      4
          const float* inputVectorPtr = inputVector;
    
      4
          const float min_val = (float)INT_MIN;
    
      4
          const float max_val = (float)((uint32_t)INT_MAX + 1);
    
        2/2✓ Branch 0 taken 524284 times.
✓ Branch 1 taken 4 times.

      524288
          for (unsigned int number = 0; number < num_points; number++) {
    
      524284
              const float r = *inputVectorPtr++ * scalar;
    
              int s;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 524284 times.

      524284
              if (r >= max_val)
    
      ✗
                  s = INT_MAX;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 524284 times.

      524284
              else if (r < min_val)
    
      ✗
                  s = INT_MIN;
    
              else
    
      524284
                  s = (int32_t)rintf(r);
    
      524284
              *outputVectorPtr++ = s;
    
          }
    
      4
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32f_s32f_convert_32i_u_H */
    
      #ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
    
      #define INCLUDED_volk_32f_s32f_convert_32i_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #include <volk/volk_common.h>
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector,
    
                                                         const float* inputVector,
    
                                                         const float scalar,
    
                                                         unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const float* inputVectorPtr = (const float*)inputVector;
    
      2
          int32_t* outputVectorPtr = outputVector;
    
      2
          float min_val = INT_MIN;
    
      2
          float max_val = (uint32_t)INT_MAX + 1;
    
          float r;
    
      2
          __m256 vScalar = _mm256_set1_ps(scalar);
    
          __m256 inputVal1;
    
          __m256i intInputVal1;
    
      2
          __m256 vmin_val = _mm256_set1_ps(min_val);
    
      2
          __m256 vmax_val = _mm256_set1_ps(max_val);
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              inputVal1 = _mm256_load_ps(inputVectorPtr);
    
      32766
              inputVectorPtr += 8;
    
      98298
              inputVal1 = _mm256_max_ps(
    
                  _mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
    
      32766
              intInputVal1 = _mm256_cvtps_epi32(inputVal1);
    
              _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
    
      32766
              outputVectorPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              r = inputVector[number] * scalar;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.

      14
              if (r > max_val)
    
      ✗
                  r = max_val;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.

      14
              else if (r < min_val)
    
      ✗
                  r = min_val;
    
      14
              outputVector[number] = (int32_t)rintf(r);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      2
      static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector,
    
                                                          const float* inputVector,
    
                                                          const float scalar,
    
                                                          unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* inputVectorPtr = (const float*)inputVector;
    
      2
          int32_t* outputVectorPtr = outputVector;
    
      2
          float min_val = INT_MIN;
    
      2
          float max_val = (uint32_t)INT_MAX + 1;
    
          float r;
    
      2
          __m128 vScalar = _mm_set_ps1(scalar);
    
          __m128 inputVal1;
    
          __m128i intInputVal1;
    
      2
          __m128 vmin_val = _mm_set_ps1(min_val);
    
      2
          __m128 vmax_val = _mm_set_ps1(max_val);
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              inputVal1 = _mm_load_ps(inputVectorPtr);
    
      65534
              inputVectorPtr += 4;
    
              inputVal1 =
    
      196602
                  _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
    
      65534
              intInputVal1 = _mm_cvtps_epi32(inputVal1);
    
              _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
    
      65534
              outputVectorPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              r = inputVector[number] * scalar;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.

      6
              if (r > max_val)
    
      ✗
                  r = max_val;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.

      6
              else if (r < min_val)
    
      ✗
                  r = min_val;
    
      6
              outputVector[number] = (int32_t)rintf(r);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE2 */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector,
    
                                                         const float* inputVector,
    
                                                         const float scalar,
    
                                                         unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* inputVectorPtr = (const float*)inputVector;
    
      2
          int32_t* outputVectorPtr = outputVector;
    
      2
          float min_val = INT_MIN;
    
      2
          float max_val = (uint32_t)INT_MAX + 1;
    
          float r;
    
      2
          __m128 vScalar = _mm_set_ps1(scalar);
    
          __m128 ret;
    
      2
          __m128 vmin_val = _mm_set_ps1(min_val);
    
      2
          __m128 vmax_val = _mm_set_ps1(max_val);
    
          __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              ret = _mm_load_ps(inputVectorPtr);
    
      65534
              inputVectorPtr += 4;
    
      196602
              ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
    
              _mm_store_ps(outputFloatBuffer, ret);
    
      65534
              *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[0]);
    
      65534
              *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[1]);
    
      65534
              *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[2]);
    
      65534
              *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[3]);
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              r = inputVector[number] * scalar;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.

      6
              if (r > max_val)
    
      ✗
                  r = max_val;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.

      6
              else if (r < min_val)
    
      ✗
                  r = min_val;
    
      6
              outputVector[number] = (int32_t)rintf(r);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector,
    
                                                             const float* inputVector,
    
                                                             const float scalar,
    
                                                             unsigned int num_points)
    
      {
    
      2
          volk_32f_s32f_convert_32i_generic(outputVector, inputVector, scalar, num_points);
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32f_s32f_convert_32i
12			*
13			* \b Overview
14			*
15			* Converts a floating point number to a 32-bit integer after applying a
16			* scaling factor.
17			*
18			* <b>Dispatcher Prototype</b>
19			* \code
20			* void volk_32f_s32f_convert_32i(int32_t* outputVector, const float* inputVector, const
21			* float scalar, unsigned int num_points) \endcode
22			*
23			* \b Inputs
24			* \li inputVector: the input vector of floats.
25			* \li scalar: The value multiplied against each point in the input buffer.
26			* \li num_points: The number of data points.
27			*
28			* \b Outputs
29			* \li outputVector: The output vector.
30			*
31			* \b Example
32			* Convert floats from [-1,1] to integers with a scale of 5 to maintain smallest delta
33			* \code
34			* int N = 10;
35			* unsigned int alignment = volk_get_alignment();
36			* float* increasing = (float)volk_malloc(sizeof(float)N, alignment);
37			* int32_t* out = (int32_t)volk_malloc(sizeof(int32_t)N, alignment);
38			*
39			* for(unsigned int ii = 0; ii < N; ++ii){
40			* increasing[ii] = 2.f * ((float)ii / (float)N) - 1.f;
41			* }
42			*
43			* // Normalize by the smallest delta (0.2 in this example)
44			* float scale = 5.f;
45			*
46			* volk_32f_s32f_convert_32i(out, increasing, scale, N);
47			*
48			* for(unsigned int ii = 0; ii < N; ++ii){
49			* printf("out[%u] = %i\n", ii, out[ii]);
50			* }
51			*
52			* volk_free(increasing);
53			* volk_free(out);
54			* \endcode
55			*/
56
57			#ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
58			#define INCLUDED_volk_32f_s32f_convert_32i_u_H
59
60			#include <inttypes.h>
61			#include <limits.h>
62			#include <stdio.h>
63
64			#ifdef LV_HAVE_AVX
65			#include <immintrin.h>
66
67		2	static inline void volk_32f_s32f_convert_32i_u_avx(int32_t* outputVector,
68			const float* inputVector,
69			const float scalar,
70			unsigned int num_points)
71			{
72		2	unsigned int number = 0;
73
74		2	const unsigned int eighthPoints = num_points / 8;
75
76		2	const float* inputVectorPtr = (const float*)inputVector;
77		2	int32_t* outputVectorPtr = outputVector;
78
79		2	float min_val = INT_MIN;
80		2	float max_val = (uint32_t)INT_MAX + 1;
81			float r;
82
83		2	__m256 vScalar = _mm256_set1_ps(scalar);
84			__m256 inputVal1;
85			__m256i intInputVal1;
86		2	__m256 vmin_val = _mm256_set1_ps(min_val);
87		2	__m256 vmax_val = _mm256_set1_ps(max_val);
88
89	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
90		32766	inputVal1 = _mm256_loadu_ps(inputVectorPtr);
91		32766	inputVectorPtr += 8;
92
93		98298	inputVal1 = _mm256_max_ps(
94			_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
95		32766	intInputVal1 = _mm256_cvtps_epi32(inputVal1);
96
97			_mm256_storeu_si256((__m256i*)outputVectorPtr, intInputVal1);
98		32766	outputVectorPtr += 8;
99			}
100
101		2	number = eighthPoints * 8;
102	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
103		14	r = inputVector[number] * scalar;
104	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 14 times.	14	if (r > max_val)
105		✗	r = max_val;
106	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 14 times.	14	else if (r < min_val)
107		✗	r = min_val;
108		14	outputVector[number] = (int32_t)rintf(r);
109			}
110		2	}
111
112			#endif /* LV_HAVE_AVX */
113
114			#ifdef LV_HAVE_SSE2
115			#include <emmintrin.h>
116
117		2	static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector,
118			const float* inputVector,
119			const float scalar,
120			unsigned int num_points)
121			{
122		2	unsigned int number = 0;
123
124		2	const unsigned int quarterPoints = num_points / 4;
125
126		2	const float* inputVectorPtr = (const float*)inputVector;
127		2	int32_t* outputVectorPtr = outputVector;
128
129		2	float min_val = INT_MIN;
130		2	float max_val = (uint32_t)INT_MAX + 1;
131			float r;
132
133		2	__m128 vScalar = _mm_set_ps1(scalar);
134			__m128 inputVal1;
135			__m128i intInputVal1;
136		2	__m128 vmin_val = _mm_set_ps1(min_val);
137		2	__m128 vmax_val = _mm_set_ps1(max_val);
138
139	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
140		65534	inputVal1 = _mm_loadu_ps(inputVectorPtr);
141		65534	inputVectorPtr += 4;
142
143			inputVal1 =
144		196602	_mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
145		65534	intInputVal1 = _mm_cvtps_epi32(inputVal1);
146
147			_mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
148		65534	outputVectorPtr += 4;
149			}
150
151		2	number = quarterPoints * 4;
152	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
153		6	r = inputVector[number] * scalar;
154	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 6 times.	6	if (r > max_val)
155		✗	r = max_val;
156	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 6 times.	6	else if (r < min_val)
157		✗	r = min_val;
158		6	outputVector[number] = (int32_t)rintf(r);
159			}
160		2	}
161
162			#endif /* LV_HAVE_SSE2 */
163
164
165			#ifdef LV_HAVE_SSE
166			#include <xmmintrin.h>
167
168		2	static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector,
169			const float* inputVector,
170			const float scalar,
171			unsigned int num_points)
172			{
173		2	unsigned int number = 0;
174
175		2	const unsigned int quarterPoints = num_points / 4;
176
177		2	const float* inputVectorPtr = (const float*)inputVector;
178		2	int32_t* outputVectorPtr = outputVector;
179
180		2	float min_val = INT_MIN;
181		2	float max_val = (uint32_t)INT_MAX + 1;
182			float r;
183
184		2	__m128 vScalar = _mm_set_ps1(scalar);
185			__m128 ret;
186		2	__m128 vmin_val = _mm_set_ps1(min_val);
187		2	__m128 vmax_val = _mm_set_ps1(max_val);
188
189			__VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
190
191	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
192		65534	ret = _mm_loadu_ps(inputVectorPtr);
193		65534	inputVectorPtr += 4;
194
195		196602	ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
196
197			_mm_store_ps(outputFloatBuffer, ret);
198		65534	*outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[0]);
199		65534	*outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[1]);
200		65534	*outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[2]);
201		65534	*outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[3]);
202			}
203
204		2	number = quarterPoints * 4;
205	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
206		6	r = inputVector[number] * scalar;
207	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 6 times.	6	if (r > max_val)
208		✗	r = max_val;
209	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 6 times.	6	else if (r < min_val)
210		✗	r = min_val;
211		6	outputVector[number] = (int32_t)rintf(r);
212			}
213		2	}
214
215			#endif /* LV_HAVE_SSE */
216
217
218			#ifdef LV_HAVE_GENERIC
219
220		4	static inline void volk_32f_s32f_convert_32i_generic(int32_t* outputVector,
221			const float* inputVector,
222			const float scalar,
223			unsigned int num_points)
224			{
225		4	int32_t* outputVectorPtr = outputVector;
226		4	const float* inputVectorPtr = inputVector;
227		4	const float min_val = (float)INT_MIN;
228		4	const float max_val = (float)((uint32_t)INT_MAX + 1);
229
230	2/2 ✓ Branch 0 taken 524284 times. ✓ Branch 1 taken 4 times.	524288	for (unsigned int number = 0; number < num_points; number++) {
231		524284	const float r = inputVectorPtr++ scalar;
232			int s;
233	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 524284 times.	524284	if (r >= max_val)
234		✗	s = INT_MAX;
235	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 524284 times.	524284	else if (r < min_val)
236		✗	s = INT_MIN;
237			else
238		524284	s = (int32_t)rintf(r);
239		524284	*outputVectorPtr++ = s;
240			}
241		4	}
242
243			#endif /* LV_HAVE_GENERIC */
244
245
246			#endif /* INCLUDED_volk_32f_s32f_convert_32i_u_H */
247			#ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
248			#define INCLUDED_volk_32f_s32f_convert_32i_a_H
249
250			#include <inttypes.h>
251			#include <stdio.h>
252			#include <volk/volk_common.h>
253
254			#ifdef LV_HAVE_AVX
255			#include <immintrin.h>
256
257		2	static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector,
258			const float* inputVector,
259			const float scalar,
260			unsigned int num_points)
261			{
262		2	unsigned int number = 0;
263
264		2	const unsigned int eighthPoints = num_points / 8;
265
266		2	const float* inputVectorPtr = (const float*)inputVector;
267		2	int32_t* outputVectorPtr = outputVector;
268
269		2	float min_val = INT_MIN;
270		2	float max_val = (uint32_t)INT_MAX + 1;
271			float r;
272
273		2	__m256 vScalar = _mm256_set1_ps(scalar);
274			__m256 inputVal1;
275			__m256i intInputVal1;
276		2	__m256 vmin_val = _mm256_set1_ps(min_val);
277		2	__m256 vmax_val = _mm256_set1_ps(max_val);
278
279	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
280		32766	inputVal1 = _mm256_load_ps(inputVectorPtr);
281		32766	inputVectorPtr += 8;
282
283		98298	inputVal1 = _mm256_max_ps(
284			_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
285		32766	intInputVal1 = _mm256_cvtps_epi32(inputVal1);
286
287			_mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
288		32766	outputVectorPtr += 8;
289			}
290
291		2	number = eighthPoints * 8;
292	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
293		14	r = inputVector[number] * scalar;
294	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 14 times.	14	if (r > max_val)
295		✗	r = max_val;
296	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 14 times.	14	else if (r < min_val)
297		✗	r = min_val;
298		14	outputVector[number] = (int32_t)rintf(r);
299			}
300		2	}
301
302			#endif /* LV_HAVE_AVX */
303
304
305			#ifdef LV_HAVE_SSE2
306			#include <emmintrin.h>
307
308		2	static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector,
309			const float* inputVector,
310			const float scalar,
311			unsigned int num_points)
312			{
313		2	unsigned int number = 0;
314
315		2	const unsigned int quarterPoints = num_points / 4;
316
317		2	const float* inputVectorPtr = (const float*)inputVector;
318		2	int32_t* outputVectorPtr = outputVector;
319
320		2	float min_val = INT_MIN;
321		2	float max_val = (uint32_t)INT_MAX + 1;
322			float r;
323
324		2	__m128 vScalar = _mm_set_ps1(scalar);
325			__m128 inputVal1;
326			__m128i intInputVal1;
327		2	__m128 vmin_val = _mm_set_ps1(min_val);
328		2	__m128 vmax_val = _mm_set_ps1(max_val);
329
330	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
331		65534	inputVal1 = _mm_load_ps(inputVectorPtr);
332		65534	inputVectorPtr += 4;
333
334			inputVal1 =
335		196602	_mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
336		65534	intInputVal1 = _mm_cvtps_epi32(inputVal1);
337
338			_mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
339		65534	outputVectorPtr += 4;
340			}
341
342		2	number = quarterPoints * 4;
343	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
344		6	r = inputVector[number] * scalar;
345	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 6 times.	6	if (r > max_val)
346		✗	r = max_val;
347	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 6 times.	6	else if (r < min_val)
348		✗	r = min_val;
349		6	outputVector[number] = (int32_t)rintf(r);
350			}
351		2	}
352
353			#endif /* LV_HAVE_SSE2 */
354
355
356			#ifdef LV_HAVE_SSE
357			#include <xmmintrin.h>
358
359		2	static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector,
360			const float* inputVector,
361			const float scalar,
362			unsigned int num_points)
363			{
364		2	unsigned int number = 0;
365
366		2	const unsigned int quarterPoints = num_points / 4;
367
368		2	const float* inputVectorPtr = (const float*)inputVector;
369		2	int32_t* outputVectorPtr = outputVector;
370
371		2	float min_val = INT_MIN;
372		2	float max_val = (uint32_t)INT_MAX + 1;
373			float r;
374
375		2	__m128 vScalar = _mm_set_ps1(scalar);
376			__m128 ret;
377		2	__m128 vmin_val = _mm_set_ps1(min_val);
378		2	__m128 vmax_val = _mm_set_ps1(max_val);
379
380			__VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
381
382	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
383		65534	ret = _mm_load_ps(inputVectorPtr);
384		65534	inputVectorPtr += 4;
385
386		196602	ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
387
388			_mm_store_ps(outputFloatBuffer, ret);
389		65534	*outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[0]);
390		65534	*outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[1]);
391		65534	*outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[2]);
392		65534	*outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[3]);
393			}
394
395		2	number = quarterPoints * 4;
396	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
397		6	r = inputVector[number] * scalar;
398	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 6 times.	6	if (r > max_val)
399		✗	r = max_val;
400	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 6 times.	6	else if (r < min_val)
401		✗	r = min_val;
402		6	outputVector[number] = (int32_t)rintf(r);
403			}
404		2	}
405
406			#endif /* LV_HAVE_SSE */
407
408
409			#ifdef LV_HAVE_GENERIC
410
411		2	static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector,
412			const float* inputVector,
413			const float scalar,
414			unsigned int num_points)
415			{
416		2	volk_32f_s32f_convert_32i_generic(outputVector, inputVector, scalar, num_points);
417		2	}
418
419			#endif /* LV_HAVE_GENERIC */
420
421			#endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */
422