GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32f_s32f_32f_fm_detect_32f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	158	170	92.9%
Functions:	4	4	100.0%
Branches:	60	72	83.3%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32f_s32f_32f_fm_detect_32f
    
       *
    
       * \b Overview
    
       *
    
       * Performs FM-detect differentiation on the input vector and stores
    
       * the results in the output vector.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32f_s32f_32f_fm_detect_32f(float* outputVector, const float* inputVector,
    
       * const float bound, float* saveValue, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li inputVector: The input vector containing phase data (must be on the interval
    
       * (-bound, bound]). \li bound: The interval that the input phase data is in, which is
    
       * used to modulo the differentiation. \li saveValue: A pointer to a float which contains
    
       * the phase value of the sample before the first input sample. \li num_points The number
    
       * of data points.
    
       *
    
       * \b Outputs
    
       * \li outputVector: The vector where the results will be stored.
    
       *
    
       * \b Example
    
       * \code
    
       * int N = 10000;
    
       *
    
       * <FIXME>
    
       *
    
       * volk_32f_s32f_32f_fm_detect_32f();
    
       *
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
    
      #define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_s32f_32f_fm_detect_32f_a_avx(float* outputVector,
    
                                                               const float* inputVector,
    
                                                               const float bound,
    
                                                               float* saveValue,
    
                                                               unsigned int num_points)
    
      {
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.

      2
          if (num_points < 1) {
    
      ✗
              return;
    
          }
    
      2
          unsigned int number = 1;
    
      2
          unsigned int j = 0;
    
          // num_points-1 keeps Fedora 7's gcc from crashing...
    
          // num_points won't work.  :(
    
      2
          const unsigned int eighthPoints = (num_points - 1) / 8;
    
      2
          float* outPtr = outputVector;
    
      2
          const float* inPtr = inputVector;
    
      2
          __m256 upperBound = _mm256_set1_ps(bound);
    
      2
          __m256 lowerBound = _mm256_set1_ps(-bound);
    
          __m256 next3old1;
    
          __m256 next4;
    
          __m256 boundAdjust;
    
      2
          __m256 posBoundAdjust = _mm256_set1_ps(-2 * bound); // Subtract when we're above.
    
      2
          __m256 negBoundAdjust = _mm256_set1_ps(2 * bound);  // Add when we're below.
    
          // Do the first 8 by hand since we're going in from the saveValue:
    
      2
          *outPtr = *inPtr - *saveValue;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.

      2
          if (*outPtr > bound)
    
      ✗
              *outPtr -= 2 * bound;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.

      2
          if (*outPtr < -bound)
    
      ✗
              *outPtr += 2 * bound;
    
      2
          inPtr++;
    
      2
          outPtr++;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (j = 1; j < ((8 < num_points) ? 8 : num_points); j++) {
    
      14
              *outPtr = *(inPtr) - *(inPtr - 1);
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.

      14
              if (*outPtr > bound)
    
      1
                  *outPtr -= 2 * bound;
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.

      14
              if (*outPtr < -bound)
    
      1
                  *outPtr += 2 * bound;
    
      14
              inPtr++;
    
      14
              outPtr++;
    
          }
    
        2/2✓ Branch 0 taken 32764 times.
✓ Branch 1 taken 2 times.

      32766
          for (; number < eighthPoints; number++) {
    
              // Load data
    
      65528
              next3old1 = _mm256_loadu_ps((float*)(inPtr - 1));
    
      32764
              next4 = _mm256_load_ps(inPtr);
    
      32764
              inPtr += 8;
    
              // Subtract and store:
    
      32764
              next3old1 = _mm256_sub_ps(next4, next3old1);
    
              // Bound:
    
      32764
              boundAdjust = _mm256_cmp_ps(next3old1, upperBound, _CMP_GT_OS);
    
      32764
              boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust);
    
      32764
              next4 = _mm256_cmp_ps(next3old1, lowerBound, _CMP_LT_OS);
    
      32764
              next4 = _mm256_and_ps(next4, negBoundAdjust);
    
      32764
              boundAdjust = _mm256_or_ps(next4, boundAdjust);
    
              // Make sure we're in the bounding interval:
    
      32764
              next3old1 = _mm256_add_ps(next3old1, boundAdjust);
    
              _mm256_store_ps(outPtr, next3old1); // Store the results back into the output
    
      32764
              outPtr += 8;
    
          }
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (number = (8 > (eighthPoints * 8) ? 8 : (8 * eighthPoints)); number < num_points;
    
      14
               number++) {
    
      14
              *outPtr = *(inPtr) - *(inPtr - 1);
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 10 times.

      14
              if (*outPtr > bound)
    
      4
                  *outPtr -= 2 * bound;
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.

      14
              if (*outPtr < -bound)
    
      1
                  *outPtr += 2 * bound;
    
      14
              inPtr++;
    
      14
              outPtr++;
    
          }
    
      2
          *saveValue = inputVector[num_points - 1];
    
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32f_s32f_32f_fm_detect_32f_a_sse(float* outputVector,
    
                                                               const float* inputVector,
    
                                                               const float bound,
    
                                                               float* saveValue,
    
                                                               unsigned int num_points)
    
      {
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.

      2
          if (num_points < 1) {
    
      ✗
              return;
    
          }
    
      2
          unsigned int number = 1;
    
      2
          unsigned int j = 0;
    
          // num_points-1 keeps Fedora 7's gcc from crashing...
    
          // num_points won't work.  :(
    
      2
          const unsigned int quarterPoints = (num_points - 1) / 4;
    
      2
          float* outPtr = outputVector;
    
      2
          const float* inPtr = inputVector;
    
      2
          __m128 upperBound = _mm_set_ps1(bound);
    
      2
          __m128 lowerBound = _mm_set_ps1(-bound);
    
          __m128 next3old1;
    
          __m128 next4;
    
          __m128 boundAdjust;
    
      2
          __m128 posBoundAdjust = _mm_set_ps1(-2 * bound); // Subtract when we're above.
    
      2
          __m128 negBoundAdjust = _mm_set_ps1(2 * bound);  // Add when we're below.
    
          // Do the first 4 by hand since we're going in from the saveValue:
    
      2
          *outPtr = *inPtr - *saveValue;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.

      2
          if (*outPtr > bound)
    
      ✗
              *outPtr -= 2 * bound;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.

      2
          if (*outPtr < -bound)
    
      ✗
              *outPtr += 2 * bound;
    
      2
          inPtr++;
    
      2
          outPtr++;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (j = 1; j < ((4 < num_points) ? 4 : num_points); j++) {
    
      6
              *outPtr = *(inPtr) - *(inPtr - 1);
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.

      6
              if (*outPtr > bound)
    
      1
                  *outPtr -= 2 * bound;
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.

      6
              if (*outPtr < -bound)
    
      1
                  *outPtr += 2 * bound;
    
      6
              inPtr++;
    
      6
              outPtr++;
    
          }
    
        2/2✓ Branch 0 taken 65532 times.
✓ Branch 1 taken 2 times.

      65534
          for (; number < quarterPoints; number++) {
    
              // Load data
    
      131064
              next3old1 = _mm_loadu_ps((float*)(inPtr - 1));
    
      65532
              next4 = _mm_load_ps(inPtr);
    
      65532
              inPtr += 4;
    
              // Subtract and store:
    
      65532
              next3old1 = _mm_sub_ps(next4, next3old1);
    
              // Bound:
    
      65532
              boundAdjust = _mm_cmpgt_ps(next3old1, upperBound);
    
      65532
              boundAdjust = _mm_and_ps(boundAdjust, posBoundAdjust);
    
      65532
              next4 = _mm_cmplt_ps(next3old1, lowerBound);
    
      65532
              next4 = _mm_and_ps(next4, negBoundAdjust);
    
      65532
              boundAdjust = _mm_or_ps(next4, boundAdjust);
    
              // Make sure we're in the bounding interval:
    
      65532
              next3old1 = _mm_add_ps(next3old1, boundAdjust);
    
              _mm_store_ps(outPtr, next3old1); // Store the results back into the output
    
      65532
              outPtr += 4;
    
          }
    
      2
          for (number = (4 > (quarterPoints * 4) ? 4 : (4 * quarterPoints));
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
               number < num_points;
    
      6
               number++) {
    
      6
              *outPtr = *(inPtr) - *(inPtr - 1);
    
        2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.

      6
              if (*outPtr > bound)
    
      2
                  *outPtr -= 2 * bound;
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.

      6
              if (*outPtr < -bound)
    
      1
                  *outPtr += 2 * bound;
    
      6
              inPtr++;
    
      6
              outPtr++;
    
          }
    
      2
          *saveValue = inputVector[num_points - 1];
    
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32f_s32f_32f_fm_detect_32f_generic(float* outputVector,
    
                                                                 const float* inputVector,
    
                                                                 const float bound,
    
                                                                 float* saveValue,
    
                                                                 unsigned int num_points)
    
      {
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.

      2
          if (num_points < 1) {
    
      ✗
              return;
    
          }
    
      2
          unsigned int number = 0;
    
      2
          float* outPtr = outputVector;
    
      2
          const float* inPtr = inputVector;
    
          // Do the first 1 by hand since we're going in from the saveValue:
    
      2
          *outPtr = *inPtr - *saveValue;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.

      2
          if (*outPtr > bound)
    
      ✗
              *outPtr -= 2 * bound;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.

      2
          if (*outPtr < -bound)
    
      ✗
              *outPtr += 2 * bound;
    
      2
          inPtr++;
    
      2
          outPtr++;
    
        2/2✓ Branch 0 taken 262140 times.
✓ Branch 1 taken 2 times.

      262142
          for (number = 1; number < num_points; number++) {
    
      262140
              *outPtr = *(inPtr) - *(inPtr - 1);
    
        2/2✓ Branch 0 taken 32572 times.
✓ Branch 1 taken 229568 times.

      262140
              if (*outPtr > bound)
    
      32572
                  *outPtr -= 2 * bound;
    
        2/2✓ Branch 0 taken 32879 times.
✓ Branch 1 taken 229261 times.

      262140
              if (*outPtr < -bound)
    
      32879
                  *outPtr += 2 * bound;
    
      262140
              inPtr++;
    
      262140
              outPtr++;
    
          }
    
      2
          *saveValue = inputVector[num_points - 1];
    
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H */
    
      #ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H
    
      #define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_s32f_32f_fm_detect_32f_u_avx(float* outputVector,
    
                                                               const float* inputVector,
    
                                                               const float bound,
    
                                                               float* saveValue,
    
                                                               unsigned int num_points)
    
      {
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.

      2
          if (num_points < 1) {
    
      ✗
              return;
    
          }
    
      2
          unsigned int number = 1;
    
      2
          unsigned int j = 0;
    
          // num_points-1 keeps Fedora 7's gcc from crashing...
    
          // num_points won't work.  :(
    
      2
          const unsigned int eighthPoints = (num_points - 1) / 8;
    
      2
          float* outPtr = outputVector;
    
      2
          const float* inPtr = inputVector;
    
      2
          __m256 upperBound = _mm256_set1_ps(bound);
    
      2
          __m256 lowerBound = _mm256_set1_ps(-bound);
    
          __m256 next3old1;
    
          __m256 next4;
    
          __m256 boundAdjust;
    
      2
          __m256 posBoundAdjust = _mm256_set1_ps(-2 * bound); // Subtract when we're above.
    
      2
          __m256 negBoundAdjust = _mm256_set1_ps(2 * bound);  // Add when we're below.
    
          // Do the first 8 by hand since we're going in from the saveValue:
    
      2
          *outPtr = *inPtr - *saveValue;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.

      2
          if (*outPtr > bound)
    
      ✗
              *outPtr -= 2 * bound;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.

      2
          if (*outPtr < -bound)
    
      ✗
              *outPtr += 2 * bound;
    
      2
          inPtr++;
    
      2
          outPtr++;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (j = 1; j < ((8 < num_points) ? 8 : num_points); j++) {
    
      14
              *outPtr = *(inPtr) - *(inPtr - 1);
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.

      14
              if (*outPtr > bound)
    
      1
                  *outPtr -= 2 * bound;
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.

      14
              if (*outPtr < -bound)
    
      1
                  *outPtr += 2 * bound;
    
      14
              inPtr++;
    
      14
              outPtr++;
    
          }
    
        2/2✓ Branch 0 taken 32764 times.
✓ Branch 1 taken 2 times.

      32766
          for (; number < eighthPoints; number++) {
    
              // Load data
    
      65528
              next3old1 = _mm256_loadu_ps((float*)(inPtr - 1));
    
      32764
              next4 = _mm256_loadu_ps(inPtr);
    
      32764
              inPtr += 8;
    
              // Subtract and store:
    
      32764
              next3old1 = _mm256_sub_ps(next4, next3old1);
    
              // Bound:
    
      32764
              boundAdjust = _mm256_cmp_ps(next3old1, upperBound, _CMP_GT_OS);
    
      32764
              boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust);
    
      32764
              next4 = _mm256_cmp_ps(next3old1, lowerBound, _CMP_LT_OS);
    
      32764
              next4 = _mm256_and_ps(next4, negBoundAdjust);
    
      32764
              boundAdjust = _mm256_or_ps(next4, boundAdjust);
    
              // Make sure we're in the bounding interval:
    
      32764
              next3old1 = _mm256_add_ps(next3old1, boundAdjust);
    
              _mm256_storeu_ps(outPtr, next3old1); // Store the results back into the output
    
      32764
              outPtr += 8;
    
          }
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (number = (8 > (eighthPoints * 8) ? 8 : (8 * eighthPoints)); number < num_points;
    
      14
               number++) {
    
      14
              *outPtr = *(inPtr) - *(inPtr - 1);
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 10 times.

      14
              if (*outPtr > bound)
    
      4
                  *outPtr -= 2 * bound;
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.

      14
              if (*outPtr < -bound)
    
      1
                  *outPtr += 2 * bound;
    
      14
              inPtr++;
    
      14
              outPtr++;
    
          }
    
      2
          *saveValue = inputVector[num_points - 1];
    
      }
    
      #endif /* LV_HAVE_AVX */
    
      #endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32f_s32f_32f_fm_detect_32f
12			*
13			* \b Overview
14			*
15			* Performs FM-detect differentiation on the input vector and stores
16			* the results in the output vector.
17			*
18			* <b>Dispatcher Prototype</b>
19			* \code
20			* void volk_32f_s32f_32f_fm_detect_32f(float* outputVector, const float* inputVector,
21			* const float bound, float* saveValue, unsigned int num_points) \endcode
22			*
23			* \b Inputs
24			* \li inputVector: The input vector containing phase data (must be on the interval
25			* (-bound, bound]). \li bound: The interval that the input phase data is in, which is
26			* used to modulo the differentiation. \li saveValue: A pointer to a float which contains
27			* the phase value of the sample before the first input sample. \li num_points The number
28			* of data points.
29			*
30			* \b Outputs
31			* \li outputVector: The vector where the results will be stored.
32			*
33			* \b Example
34			* \code
35			* int N = 10000;
36			*
37			* <FIXME>
38			*
39			* volk_32f_s32f_32f_fm_detect_32f();
40			*
41			* \endcode
42			*/
43
44			#ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
45			#define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
46
47			#include <inttypes.h>
48			#include <stdio.h>
49
50			#ifdef LV_HAVE_AVX
51			#include <immintrin.h>
52
53		2	static inline void volk_32f_s32f_32f_fm_detect_32f_a_avx(float* outputVector,
54			const float* inputVector,
55			const float bound,
56			float* saveValue,
57			unsigned int num_points)
58			{
59	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 2 times.	2	if (num_points < 1) {
60		✗	return;
61			}
62		2	unsigned int number = 1;
63		2	unsigned int j = 0;
64			// num_points-1 keeps Fedora 7's gcc from crashing...
65			// num_points won't work. :(
66		2	const unsigned int eighthPoints = (num_points - 1) / 8;
67
68		2	float* outPtr = outputVector;
69		2	const float* inPtr = inputVector;
70		2	__m256 upperBound = _mm256_set1_ps(bound);
71		2	__m256 lowerBound = _mm256_set1_ps(-bound);
72			__m256 next3old1;
73			__m256 next4;
74			__m256 boundAdjust;
75		2	__m256 posBoundAdjust = _mm256_set1_ps(-2 * bound); // Subtract when we're above.
76		2	__m256 negBoundAdjust = _mm256_set1_ps(2 * bound); // Add when we're below.
77			// Do the first 8 by hand since we're going in from the saveValue:
78		2	outPtr = inPtr - *saveValue;
79	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 2 times.	2	if (*outPtr > bound)
80		✗	outPtr -= 2 bound;
81	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 2 times.	2	if (*outPtr < -bound)
82		✗	outPtr += 2 bound;
83		2	inPtr++;
84		2	outPtr++;
85	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (j = 1; j < ((8 < num_points) ? 8 : num_points); j++) {
86		14	outPtr = (inPtr) - *(inPtr - 1);
87	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 13 times.	14	if (*outPtr > bound)
88		1	outPtr -= 2 bound;
89	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 13 times.	14	if (*outPtr < -bound)
90		1	outPtr += 2 bound;
91		14	inPtr++;
92		14	outPtr++;
93			}
94
95	2/2 ✓ Branch 0 taken 32764 times. ✓ Branch 1 taken 2 times.	32766	for (; number < eighthPoints; number++) {
96			// Load data
97		65528	next3old1 = _mm256_loadu_ps((float*)(inPtr - 1));
98		32764	next4 = _mm256_load_ps(inPtr);
99		32764	inPtr += 8;
100			// Subtract and store:
101		32764	next3old1 = _mm256_sub_ps(next4, next3old1);
102			// Bound:
103		32764	boundAdjust = _mm256_cmp_ps(next3old1, upperBound, _CMP_GT_OS);
104		32764	boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust);
105		32764	next4 = _mm256_cmp_ps(next3old1, lowerBound, _CMP_LT_OS);
106		32764	next4 = _mm256_and_ps(next4, negBoundAdjust);
107		32764	boundAdjust = _mm256_or_ps(next4, boundAdjust);
108			// Make sure we're in the bounding interval:
109		32764	next3old1 = _mm256_add_ps(next3old1, boundAdjust);
110			_mm256_store_ps(outPtr, next3old1); // Store the results back into the output
111		32764	outPtr += 8;
112			}
113
114	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (number = (8 > (eighthPoints * 8) ? 8 : (8 * eighthPoints)); number < num_points;
115		14	number++) {
116		14	outPtr = (inPtr) - *(inPtr - 1);
117	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 10 times.	14	if (*outPtr > bound)
118		4	outPtr -= 2 bound;
119	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 13 times.	14	if (*outPtr < -bound)
120		1	outPtr += 2 bound;
121		14	inPtr++;
122		14	outPtr++;
123			}
124
125		2	*saveValue = inputVector[num_points - 1];
126			}
127			#endif /* LV_HAVE_AVX */
128
129
130			#ifdef LV_HAVE_SSE
131			#include <xmmintrin.h>
132
133		2	static inline void volk_32f_s32f_32f_fm_detect_32f_a_sse(float* outputVector,
134			const float* inputVector,
135			const float bound,
136			float* saveValue,
137			unsigned int num_points)
138			{
139	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 2 times.	2	if (num_points < 1) {
140		✗	return;
141			}
142		2	unsigned int number = 1;
143		2	unsigned int j = 0;
144			// num_points-1 keeps Fedora 7's gcc from crashing...
145			// num_points won't work. :(
146		2	const unsigned int quarterPoints = (num_points - 1) / 4;
147
148		2	float* outPtr = outputVector;
149		2	const float* inPtr = inputVector;
150		2	__m128 upperBound = _mm_set_ps1(bound);
151		2	__m128 lowerBound = _mm_set_ps1(-bound);
152			__m128 next3old1;
153			__m128 next4;
154			__m128 boundAdjust;
155		2	__m128 posBoundAdjust = _mm_set_ps1(-2 * bound); // Subtract when we're above.
156		2	__m128 negBoundAdjust = _mm_set_ps1(2 * bound); // Add when we're below.
157			// Do the first 4 by hand since we're going in from the saveValue:
158		2	outPtr = inPtr - *saveValue;
159	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 2 times.	2	if (*outPtr > bound)
160		✗	outPtr -= 2 bound;
161	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 2 times.	2	if (*outPtr < -bound)
162		✗	outPtr += 2 bound;
163		2	inPtr++;
164		2	outPtr++;
165	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (j = 1; j < ((4 < num_points) ? 4 : num_points); j++) {
166		6	outPtr = (inPtr) - *(inPtr - 1);
167	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 5 times.	6	if (*outPtr > bound)
168		1	outPtr -= 2 bound;
169	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 5 times.	6	if (*outPtr < -bound)
170		1	outPtr += 2 bound;
171		6	inPtr++;
172		6	outPtr++;
173			}
174
175	2/2 ✓ Branch 0 taken 65532 times. ✓ Branch 1 taken 2 times.	65534	for (; number < quarterPoints; number++) {
176			// Load data
177		131064	next3old1 = _mm_loadu_ps((float*)(inPtr - 1));
178		65532	next4 = _mm_load_ps(inPtr);
179		65532	inPtr += 4;
180			// Subtract and store:
181		65532	next3old1 = _mm_sub_ps(next4, next3old1);
182			// Bound:
183		65532	boundAdjust = _mm_cmpgt_ps(next3old1, upperBound);
184		65532	boundAdjust = _mm_and_ps(boundAdjust, posBoundAdjust);
185		65532	next4 = _mm_cmplt_ps(next3old1, lowerBound);
186		65532	next4 = _mm_and_ps(next4, negBoundAdjust);
187		65532	boundAdjust = _mm_or_ps(next4, boundAdjust);
188			// Make sure we're in the bounding interval:
189		65532	next3old1 = _mm_add_ps(next3old1, boundAdjust);
190			_mm_store_ps(outPtr, next3old1); // Store the results back into the output
191		65532	outPtr += 4;
192			}
193
194		2	for (number = (4 > (quarterPoints * 4) ? 4 : (4 * quarterPoints));
195	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	number < num_points;
196		6	number++) {
197		6	outPtr = (inPtr) - *(inPtr - 1);
198	2/2 ✓ Branch 0 taken 2 times. ✓ Branch 1 taken 4 times.	6	if (*outPtr > bound)
199		2	outPtr -= 2 bound;
200	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 5 times.	6	if (*outPtr < -bound)
201		1	outPtr += 2 bound;
202		6	inPtr++;
203		6	outPtr++;
204			}
205
206		2	*saveValue = inputVector[num_points - 1];
207			}
208			#endif /* LV_HAVE_SSE */
209
210			#ifdef LV_HAVE_GENERIC
211
212		2	static inline void volk_32f_s32f_32f_fm_detect_32f_generic(float* outputVector,
213			const float* inputVector,
214			const float bound,
215			float* saveValue,
216			unsigned int num_points)
217			{
218	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 2 times.	2	if (num_points < 1) {
219		✗	return;
220			}
221		2	unsigned int number = 0;
222		2	float* outPtr = outputVector;
223		2	const float* inPtr = inputVector;
224
225			// Do the first 1 by hand since we're going in from the saveValue:
226		2	outPtr = inPtr - *saveValue;
227	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 2 times.	2	if (*outPtr > bound)
228		✗	outPtr -= 2 bound;
229	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 2 times.	2	if (*outPtr < -bound)
230		✗	outPtr += 2 bound;
231		2	inPtr++;
232		2	outPtr++;
233
234	2/2 ✓ Branch 0 taken 262140 times. ✓ Branch 1 taken 2 times.	262142	for (number = 1; number < num_points; number++) {
235		262140	outPtr = (inPtr) - *(inPtr - 1);
236	2/2 ✓ Branch 0 taken 32572 times. ✓ Branch 1 taken 229568 times.	262140	if (*outPtr > bound)
237		32572	outPtr -= 2 bound;
238	2/2 ✓ Branch 0 taken 32879 times. ✓ Branch 1 taken 229261 times.	262140	if (*outPtr < -bound)
239		32879	outPtr += 2 bound;
240		262140	inPtr++;
241		262140	outPtr++;
242			}
243
244		2	*saveValue = inputVector[num_points - 1];
245			}
246			#endif /* LV_HAVE_GENERIC */
247
248
249			#endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H */
250
251
252			#ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H
253			#define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H
254
255			#include <inttypes.h>
256			#include <stdio.h>
257
258			#ifdef LV_HAVE_AVX
259			#include <immintrin.h>
260
261		2	static inline void volk_32f_s32f_32f_fm_detect_32f_u_avx(float* outputVector,
262			const float* inputVector,
263			const float bound,
264			float* saveValue,
265			unsigned int num_points)
266			{
267	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 2 times.	2	if (num_points < 1) {
268		✗	return;
269			}
270		2	unsigned int number = 1;
271		2	unsigned int j = 0;
272			// num_points-1 keeps Fedora 7's gcc from crashing...
273			// num_points won't work. :(
274		2	const unsigned int eighthPoints = (num_points - 1) / 8;
275
276		2	float* outPtr = outputVector;
277		2	const float* inPtr = inputVector;
278		2	__m256 upperBound = _mm256_set1_ps(bound);
279		2	__m256 lowerBound = _mm256_set1_ps(-bound);
280			__m256 next3old1;
281			__m256 next4;
282			__m256 boundAdjust;
283		2	__m256 posBoundAdjust = _mm256_set1_ps(-2 * bound); // Subtract when we're above.
284		2	__m256 negBoundAdjust = _mm256_set1_ps(2 * bound); // Add when we're below.
285			// Do the first 8 by hand since we're going in from the saveValue:
286		2	outPtr = inPtr - *saveValue;
287	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 2 times.	2	if (*outPtr > bound)
288		✗	outPtr -= 2 bound;
289	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 2 times.	2	if (*outPtr < -bound)
290		✗	outPtr += 2 bound;
291		2	inPtr++;
292		2	outPtr++;
293	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (j = 1; j < ((8 < num_points) ? 8 : num_points); j++) {
294		14	outPtr = (inPtr) - *(inPtr - 1);
295	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 13 times.	14	if (*outPtr > bound)
296		1	outPtr -= 2 bound;
297	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 13 times.	14	if (*outPtr < -bound)
298		1	outPtr += 2 bound;
299		14	inPtr++;
300		14	outPtr++;
301			}
302
303	2/2 ✓ Branch 0 taken 32764 times. ✓ Branch 1 taken 2 times.	32766	for (; number < eighthPoints; number++) {
304			// Load data
305		65528	next3old1 = _mm256_loadu_ps((float*)(inPtr - 1));
306		32764	next4 = _mm256_loadu_ps(inPtr);
307		32764	inPtr += 8;
308			// Subtract and store:
309		32764	next3old1 = _mm256_sub_ps(next4, next3old1);
310			// Bound:
311		32764	boundAdjust = _mm256_cmp_ps(next3old1, upperBound, _CMP_GT_OS);
312		32764	boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust);
313		32764	next4 = _mm256_cmp_ps(next3old1, lowerBound, _CMP_LT_OS);
314		32764	next4 = _mm256_and_ps(next4, negBoundAdjust);
315		32764	boundAdjust = _mm256_or_ps(next4, boundAdjust);
316			// Make sure we're in the bounding interval:
317		32764	next3old1 = _mm256_add_ps(next3old1, boundAdjust);
318			_mm256_storeu_ps(outPtr, next3old1); // Store the results back into the output
319		32764	outPtr += 8;
320			}
321
322	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (number = (8 > (eighthPoints * 8) ? 8 : (8 * eighthPoints)); number < num_points;
323		14	number++) {
324		14	outPtr = (inPtr) - *(inPtr - 1);
325	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 10 times.	14	if (*outPtr > bound)
326		4	outPtr -= 2 bound;
327	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 13 times.	14	if (*outPtr < -bound)
328		1	outPtr += 2 bound;
329		14	inPtr++;
330		14	outPtr++;
331			}
332
333		2	*saveValue = inputVector[num_points - 1];
334			}
335			#endif /* LV_HAVE_AVX */
336
337
338			#endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H */
339