GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_8ic_x2_multiply_conjugate_16ic.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	115	115	100.0%
Functions:	4	4	100.0%
Branches:	14	14	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
    
      #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #include <volk/volk_complex.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      /*!
    
        \brief Multiplys the one complex vector with the complex conjugate of the second complex
    
        vector and stores their results in the third vector \param cVector The complex vector
    
        where the results will be stored \param aVector One of the complex vectors to be
    
        multiplied \param bVector The complex vector which will be converted to complex
    
        conjugate and multiplied \param num_points The number of complex values in aVector and
    
        bVector to be multiplied together and stored into cVector
    
      */
    
      2
      static inline void volk_8ic_x2_multiply_conjugate_16ic_a_avx2(lv_16sc_t* cVector,
    
                                                                    const lv_8sc_t* aVector,
    
                                                                    const lv_8sc_t* bVector,
    
                                                                    unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 8;
    
          __m256i x, y, realz, imagz;
    
      2
          lv_16sc_t* c = cVector;
    
      2
          const lv_8sc_t* a = aVector;
    
      2
          const lv_8sc_t* b = bVector;
    
          __m256i conjugateSign =
    
      2
              _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < quarterPoints; number++) {
    
              // Convert 8 bit values into 16 bit values
    
      65532
              x = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)a));
    
      65532
              y = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)b));
    
              // Calculate the ar*cr - ai*(-ci) portions
    
      32766
              realz = _mm256_madd_epi16(x, y);
    
              // Calculate the complex conjugate of the cr + ci j values
    
      32766
              y = _mm256_sign_epi16(y, conjugateSign);
    
              // Shift the order of the cr and ci values
    
      32766
              y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
    
                                         _MM_SHUFFLE(2, 3, 0, 1));
    
              // Calculate the ar*(-ci) + cr*(ai)
    
      32766
              imagz = _mm256_madd_epi16(x, y);
    
              // Perform the addition of products
    
      98298
              _mm256_store_si256((__m256i*)c,
    
                                 _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz),
    
                                                    _mm256_unpackhi_epi32(realz, imagz)));
    
      32766
              a += 8;
    
      32766
              b += 8;
    
      32766
              c += 8;
    
          }
    
      2
          number = quarterPoints * 8;
    
      2
          int16_t* c16Ptr = (int16_t*)&cVector[number];
    
      2
          int8_t* a8Ptr = (int8_t*)&aVector[number];
    
      2
          int8_t* b8Ptr = (int8_t*)&bVector[number];
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              float aReal = (float)*a8Ptr++;
    
      14
              float aImag = (float)*a8Ptr++;
    
      14
              lv_32fc_t aVal = lv_cmake(aReal, aImag);
    
      14
              float bReal = (float)*b8Ptr++;
    
      14
              float bImag = (float)*b8Ptr++;
    
      14
              lv_32fc_t bVal = lv_cmake(bReal, -bImag);
    
      14
              lv_32fc_t temp = aVal * bVal;
    
      14
              *c16Ptr++ = (int16_t)lv_creal(temp);
    
      14
              *c16Ptr++ = (int16_t)lv_cimag(temp);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #ifdef LV_HAVE_SSE4_1
    
      #include <smmintrin.h>
    
      /*!
    
        \brief Multiplys the one complex vector with the complex conjugate of the second complex
    
        vector and stores their results in the third vector \param cVector The complex vector
    
        where the results will be stored \param aVector One of the complex vectors to be
    
        multiplied \param bVector The complex vector which will be converted to complex
    
        conjugate and multiplied \param num_points The number of complex values in aVector and
    
        bVector to be multiplied together and stored into cVector
    
      */
    
      2
      static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVector,
    
                                                                      const lv_8sc_t* aVector,
    
                                                                      const lv_8sc_t* bVector,
    
                                                                      unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
          __m128i x, y, realz, imagz;
    
      2
          lv_16sc_t* c = cVector;
    
      2
          const lv_8sc_t* a = aVector;
    
      2
          const lv_8sc_t* b = bVector;
    
      2
          __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
              // Convert into 8 bit values into 16 bit values
    
      131068
              x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
    
      131068
              y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
    
              // Calculate the ar*cr - ai*(-ci) portions
    
      65534
              realz = _mm_madd_epi16(x, y);
    
              // Calculate the complex conjugate of the cr + ci j values
    
      65534
              y = _mm_sign_epi16(y, conjugateSign);
    
              // Shift the order of the cr and ci values
    
      65534
              y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
    
                                      _MM_SHUFFLE(2, 3, 0, 1));
    
              // Calculate the ar*(-ci) + cr*(ai)
    
      65534
              imagz = _mm_madd_epi16(x, y);
    
      196602
              _mm_store_si128((__m128i*)c,
    
                              _mm_packs_epi32(_mm_unpacklo_epi32(realz, imagz),
    
                                              _mm_unpackhi_epi32(realz, imagz)));
    
      65534
              a += 4;
    
      65534
              b += 4;
    
      65534
              c += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
      2
          int16_t* c16Ptr = (int16_t*)&cVector[number];
    
      2
          int8_t* a8Ptr = (int8_t*)&aVector[number];
    
      2
          int8_t* b8Ptr = (int8_t*)&bVector[number];
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              float aReal = (float)*a8Ptr++;
    
      6
              float aImag = (float)*a8Ptr++;
    
      6
              lv_32fc_t aVal = lv_cmake(aReal, aImag);
    
      6
              float bReal = (float)*b8Ptr++;
    
      6
              float bImag = (float)*b8Ptr++;
    
      6
              lv_32fc_t bVal = lv_cmake(bReal, -bImag);
    
      6
              lv_32fc_t temp = aVal * bVal;
    
      6
              *c16Ptr++ = (int16_t)lv_creal(temp);
    
      6
              *c16Ptr++ = (int16_t)lv_cimag(temp);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE4_1 */
    
      #ifdef LV_HAVE_GENERIC
    
      /*!
    
        \brief Multiplys the one complex vector with the complex conjugate of the second complex
    
        vector and stores their results in the third vector \param cVector The complex vector
    
        where the results will be stored \param aVector One of the complex vectors to be
    
        multiplied \param bVector The complex vector which will be converted to complex
    
        conjugate and multiplied \param num_points The number of complex values in aVector and
    
        bVector to be multiplied together and stored into cVector
    
      */
    
      2
      static inline void volk_8ic_x2_multiply_conjugate_16ic_generic(lv_16sc_t* cVector,
    
                                                                     const lv_8sc_t* aVector,
    
                                                                     const lv_8sc_t* bVector,
    
                                                                     unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          int16_t* c16Ptr = (int16_t*)cVector;
    
      2
          int8_t* a8Ptr = (int8_t*)aVector;
    
      2
          int8_t* b8Ptr = (int8_t*)bVector;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              float aReal = (float)*a8Ptr++;
    
      262142
              float aImag = (float)*a8Ptr++;
    
      262142
              lv_32fc_t aVal = lv_cmake(aReal, aImag);
    
      262142
              float bReal = (float)*b8Ptr++;
    
      262142
              float bImag = (float)*b8Ptr++;
    
      262142
              lv_32fc_t bVal = lv_cmake(bReal, -bImag);
    
      262142
              lv_32fc_t temp = aVal * bVal;
    
      262142
              *c16Ptr++ = (int16_t)lv_creal(temp);
    
      262142
              *c16Ptr++ = (int16_t)lv_cimag(temp);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H */
    
      #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H
    
      #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #include <volk/volk_complex.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      /*!
    
        \brief Multiplys the one complex vector with the complex conjugate of the second complex
    
        vector and stores their results in the third vector \param cVector The complex vector
    
        where the results will be stored \param aVector One of the complex vectors to be
    
        multiplied \param bVector The complex vector which will be converted to complex
    
        conjugate and multiplied \param num_points The number of complex values in aVector and
    
        bVector to be multiplied together and stored into cVector
    
      */
    
      2
      static inline void volk_8ic_x2_multiply_conjugate_16ic_u_avx2(lv_16sc_t* cVector,
    
                                                                    const lv_8sc_t* aVector,
    
                                                                    const lv_8sc_t* bVector,
    
                                                                    unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int oneEigthPoints = num_points / 8;
    
          __m256i x, y, realz, imagz;
    
      2
          lv_16sc_t* c = cVector;
    
      2
          const lv_8sc_t* a = aVector;
    
      2
          const lv_8sc_t* b = bVector;
    
          __m256i conjugateSign =
    
      2
              _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < oneEigthPoints; number++) {
    
              // Convert 8 bit values into 16 bit values
    
      65532
              x = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)a));
    
      65532
              y = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)b));
    
              // Calculate the ar*cr - ai*(-ci) portions
    
      32766
              realz = _mm256_madd_epi16(x, y);
    
              // Calculate the complex conjugate of the cr + ci j values
    
      32766
              y = _mm256_sign_epi16(y, conjugateSign);
    
              // Shift the order of the cr and ci values
    
      32766
              y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
    
                                         _MM_SHUFFLE(2, 3, 0, 1));
    
              // Calculate the ar*(-ci) + cr*(ai)
    
      32766
              imagz = _mm256_madd_epi16(x, y);
    
              // Perform the addition of products
    
      98298
              _mm256_storeu_si256((__m256i*)c,
    
                                  _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz),
    
                                                     _mm256_unpackhi_epi32(realz, imagz)));
    
      32766
              a += 8;
    
      32766
              b += 8;
    
      32766
              c += 8;
    
          }
    
      2
          number = oneEigthPoints * 8;
    
      2
          int16_t* c16Ptr = (int16_t*)&cVector[number];
    
      2
          int8_t* a8Ptr = (int8_t*)&aVector[number];
    
      2
          int8_t* b8Ptr = (int8_t*)&bVector[number];
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              float aReal = (float)*a8Ptr++;
    
      14
              float aImag = (float)*a8Ptr++;
    
      14
              lv_32fc_t aVal = lv_cmake(aReal, aImag);
    
      14
              float bReal = (float)*b8Ptr++;
    
      14
              float bImag = (float)*b8Ptr++;
    
      14
              lv_32fc_t bVal = lv_cmake(bReal, -bImag);
    
      14
              lv_32fc_t temp = aVal * bVal;
    
      14
              *c16Ptr++ = (int16_t)lv_creal(temp);
    
      14
              *c16Ptr++ = (int16_t)lv_cimag(temp);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
11			#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
12
13			#include <inttypes.h>
14			#include <stdio.h>
15			#include <volk/volk_complex.h>
16
17			#ifdef LV_HAVE_AVX2
18			#include <immintrin.h>
19			/*!
20			\brief Multiplys the one complex vector with the complex conjugate of the second complex
21			vector and stores their results in the third vector \param cVector The complex vector
22			where the results will be stored \param aVector One of the complex vectors to be
23			multiplied \param bVector The complex vector which will be converted to complex
24			conjugate and multiplied \param num_points The number of complex values in aVector and
25			bVector to be multiplied together and stored into cVector
26			*/
27		2	static inline void volk_8ic_x2_multiply_conjugate_16ic_a_avx2(lv_16sc_t* cVector,
28			const lv_8sc_t* aVector,
29			const lv_8sc_t* bVector,
30			unsigned int num_points)
31			{
32		2	unsigned int number = 0;
33		2	const unsigned int quarterPoints = num_points / 8;
34
35			__m256i x, y, realz, imagz;
36		2	lv_16sc_t* c = cVector;
37		2	const lv_8sc_t* a = aVector;
38		2	const lv_8sc_t* b = bVector;
39			__m256i conjugateSign =
40		2	_mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
41
42	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < quarterPoints; number++) {
43			// Convert 8 bit values into 16 bit values
44		65532	x = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)a));
45		65532	y = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)b));
46
47			// Calculate the arcr - ai(-ci) portions
48		32766	realz = _mm256_madd_epi16(x, y);
49
50			// Calculate the complex conjugate of the cr + ci j values
51		32766	y = _mm256_sign_epi16(y, conjugateSign);
52
53			// Shift the order of the cr and ci values
54		32766	y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
55			_MM_SHUFFLE(2, 3, 0, 1));
56
57			// Calculate the ar(-ci) + cr(ai)
58		32766	imagz = _mm256_madd_epi16(x, y);
59
60			// Perform the addition of products
61
62		98298	_mm256_store_si256((__m256i*)c,
63			_mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz),
64			_mm256_unpackhi_epi32(realz, imagz)));
65
66		32766	a += 8;
67		32766	b += 8;
68		32766	c += 8;
69			}
70
71		2	number = quarterPoints * 8;
72		2	int16_t* c16Ptr = (int16_t*)&cVector[number];
73		2	int8_t* a8Ptr = (int8_t*)&aVector[number];
74		2	int8_t* b8Ptr = (int8_t*)&bVector[number];
75	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
76		14	float aReal = (float)*a8Ptr++;
77		14	float aImag = (float)*a8Ptr++;
78		14	lv_32fc_t aVal = lv_cmake(aReal, aImag);
79		14	float bReal = (float)*b8Ptr++;
80		14	float bImag = (float)*b8Ptr++;
81		14	lv_32fc_t bVal = lv_cmake(bReal, -bImag);
82		14	lv_32fc_t temp = aVal * bVal;
83
84		14	*c16Ptr++ = (int16_t)lv_creal(temp);
85		14	*c16Ptr++ = (int16_t)lv_cimag(temp);
86			}
87		2	}
88			#endif /* LV_HAVE_AVX2 */
89
90
91			#ifdef LV_HAVE_SSE4_1
92			#include <smmintrin.h>
93			/*!
94			\brief Multiplys the one complex vector with the complex conjugate of the second complex
95			vector and stores their results in the third vector \param cVector The complex vector
96			where the results will be stored \param aVector One of the complex vectors to be
97			multiplied \param bVector The complex vector which will be converted to complex
98			conjugate and multiplied \param num_points The number of complex values in aVector and
99			bVector to be multiplied together and stored into cVector
100			*/
101		2	static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVector,
102			const lv_8sc_t* aVector,
103			const lv_8sc_t* bVector,
104			unsigned int num_points)
105			{
106		2	unsigned int number = 0;
107		2	const unsigned int quarterPoints = num_points / 4;
108
109			__m128i x, y, realz, imagz;
110		2	lv_16sc_t* c = cVector;
111		2	const lv_8sc_t* a = aVector;
112		2	const lv_8sc_t* b = bVector;
113		2	__m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
114
115	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
116			// Convert into 8 bit values into 16 bit values
117		131068	x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
118		131068	y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
119
120			// Calculate the arcr - ai(-ci) portions
121		65534	realz = _mm_madd_epi16(x, y);
122
123			// Calculate the complex conjugate of the cr + ci j values
124		65534	y = _mm_sign_epi16(y, conjugateSign);
125
126			// Shift the order of the cr and ci values
127		65534	y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
128			_MM_SHUFFLE(2, 3, 0, 1));
129
130			// Calculate the ar(-ci) + cr(ai)
131		65534	imagz = _mm_madd_epi16(x, y);
132
133		196602	_mm_store_si128((__m128i*)c,
134			_mm_packs_epi32(_mm_unpacklo_epi32(realz, imagz),
135			_mm_unpackhi_epi32(realz, imagz)));
136
137		65534	a += 4;
138		65534	b += 4;
139		65534	c += 4;
140			}
141
142		2	number = quarterPoints * 4;
143		2	int16_t* c16Ptr = (int16_t*)&cVector[number];
144		2	int8_t* a8Ptr = (int8_t*)&aVector[number];
145		2	int8_t* b8Ptr = (int8_t*)&bVector[number];
146	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
147		6	float aReal = (float)*a8Ptr++;
148		6	float aImag = (float)*a8Ptr++;
149		6	lv_32fc_t aVal = lv_cmake(aReal, aImag);
150		6	float bReal = (float)*b8Ptr++;
151		6	float bImag = (float)*b8Ptr++;
152		6	lv_32fc_t bVal = lv_cmake(bReal, -bImag);
153		6	lv_32fc_t temp = aVal * bVal;
154
155		6	*c16Ptr++ = (int16_t)lv_creal(temp);
156		6	*c16Ptr++ = (int16_t)lv_cimag(temp);
157			}
158		2	}
159			#endif /* LV_HAVE_SSE4_1 */
160
161			#ifdef LV_HAVE_GENERIC
162			/*!
163			\brief Multiplys the one complex vector with the complex conjugate of the second complex
164			vector and stores their results in the third vector \param cVector The complex vector
165			where the results will be stored \param aVector One of the complex vectors to be
166			multiplied \param bVector The complex vector which will be converted to complex
167			conjugate and multiplied \param num_points The number of complex values in aVector and
168			bVector to be multiplied together and stored into cVector
169			*/
170		2	static inline void volk_8ic_x2_multiply_conjugate_16ic_generic(lv_16sc_t* cVector,
171			const lv_8sc_t* aVector,
172			const lv_8sc_t* bVector,
173			unsigned int num_points)
174			{
175		2	unsigned int number = 0;
176		2	int16_t* c16Ptr = (int16_t*)cVector;
177		2	int8_t* a8Ptr = (int8_t*)aVector;
178		2	int8_t* b8Ptr = (int8_t*)bVector;
179	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
180		262142	float aReal = (float)*a8Ptr++;
181		262142	float aImag = (float)*a8Ptr++;
182		262142	lv_32fc_t aVal = lv_cmake(aReal, aImag);
183		262142	float bReal = (float)*b8Ptr++;
184		262142	float bImag = (float)*b8Ptr++;
185		262142	lv_32fc_t bVal = lv_cmake(bReal, -bImag);
186		262142	lv_32fc_t temp = aVal * bVal;
187
188		262142	*c16Ptr++ = (int16_t)lv_creal(temp);
189		262142	*c16Ptr++ = (int16_t)lv_cimag(temp);
190			}
191		2	}
192			#endif /* LV_HAVE_GENERIC */
193
194			#endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H */
195
196			#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H
197			#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H
198
199			#include <inttypes.h>
200			#include <stdio.h>
201			#include <volk/volk_complex.h>
202
203			#ifdef LV_HAVE_AVX2
204			#include <immintrin.h>
205			/*!
206			\brief Multiplys the one complex vector with the complex conjugate of the second complex
207			vector and stores their results in the third vector \param cVector The complex vector
208			where the results will be stored \param aVector One of the complex vectors to be
209			multiplied \param bVector The complex vector which will be converted to complex
210			conjugate and multiplied \param num_points The number of complex values in aVector and
211			bVector to be multiplied together and stored into cVector
212			*/
213		2	static inline void volk_8ic_x2_multiply_conjugate_16ic_u_avx2(lv_16sc_t* cVector,
214			const lv_8sc_t* aVector,
215			const lv_8sc_t* bVector,
216			unsigned int num_points)
217			{
218		2	unsigned int number = 0;
219		2	const unsigned int oneEigthPoints = num_points / 8;
220
221			__m256i x, y, realz, imagz;
222		2	lv_16sc_t* c = cVector;
223		2	const lv_8sc_t* a = aVector;
224		2	const lv_8sc_t* b = bVector;
225			__m256i conjugateSign =
226		2	_mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
227
228	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < oneEigthPoints; number++) {
229			// Convert 8 bit values into 16 bit values
230		65532	x = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)a));
231		65532	y = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)b));
232
233			// Calculate the arcr - ai(-ci) portions
234		32766	realz = _mm256_madd_epi16(x, y);
235
236			// Calculate the complex conjugate of the cr + ci j values
237		32766	y = _mm256_sign_epi16(y, conjugateSign);
238
239			// Shift the order of the cr and ci values
240		32766	y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
241			_MM_SHUFFLE(2, 3, 0, 1));
242
243			// Calculate the ar(-ci) + cr(ai)
244		32766	imagz = _mm256_madd_epi16(x, y);
245
246			// Perform the addition of products
247
248		98298	_mm256_storeu_si256((__m256i*)c,
249			_mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz),
250			_mm256_unpackhi_epi32(realz, imagz)));
251
252		32766	a += 8;
253		32766	b += 8;
254		32766	c += 8;
255			}
256
257		2	number = oneEigthPoints * 8;
258		2	int16_t* c16Ptr = (int16_t*)&cVector[number];
259		2	int8_t* a8Ptr = (int8_t*)&aVector[number];
260		2	int8_t* b8Ptr = (int8_t*)&bVector[number];
261	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
262		14	float aReal = (float)*a8Ptr++;
263		14	float aImag = (float)*a8Ptr++;
264		14	lv_32fc_t aVal = lv_cmake(aReal, aImag);
265		14	float bReal = (float)*b8Ptr++;
266		14	float bImag = (float)*b8Ptr++;
267		14	lv_32fc_t bVal = lv_cmake(bReal, -bImag);
268		14	lv_32fc_t temp = aVal * bVal;
269
270		14	*c16Ptr++ = (int16_t)lv_creal(temp);
271		14	*c16Ptr++ = (int16_t)lv_cimag(temp);
272			}
273		2	}
274			#endif /* LV_HAVE_AVX2 */
275
276			#endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H */
277