GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32i_x2_or_32i.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	62	96	64.6%
Functions:	5	7	71.4%
Branches:	14	22	63.6%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32i_x2_or_32i
    
       *
    
       * \b Overview
    
       *
    
       * Computes the Boolean OR operation between two input 32-bit integer vectors.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32i_x2_or_32i(int32_t* cVector, const int32_t* aVector, const int32_t*
    
       * bVector, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li aVector: Input vector of samples.
    
       * \li bVector: Input vector of samples.
    
       * \li num_points: The number of values.
    
       *
    
       * \b Outputs
    
       * \li cVector: The output vector.
    
       *
    
       * \b Example
    
       * This example generates a Karnaugh map for the first two bits of x OR y
    
       * \code
    
       *   int N = 1<<4;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *
    
       *   int32_t* x = (int32_t*)volk_malloc(N*sizeof(int32_t), alignment);
    
       *   int32_t* y = (int32_t*)volk_malloc(N*sizeof(int32_t), alignment);
    
       *   int32_t* z = (int32_t*)volk_malloc(N*sizeof(int32_t), alignment);
    
       *   int32_t in_seq[] = {0,1,3,2};
    
       *   unsigned int jj=0;
    
       *   for(unsigned int ii=0; ii<N; ++ii){
    
       *       x[ii] = in_seq[ii%4];
    
       *       y[ii] = in_seq[jj];
    
       *       if(((ii+1) % 4) == 0) jj++;
    
       *   }
    
       *
    
       *   volk_32i_x2_or_32i(z, x, y, N);
    
       *
    
       *   printf("Karnaugh map for x OR y\n");
    
       *   printf("y\\x|");
    
       *   for(unsigned int ii=0; ii<4; ++ii){
    
       *       printf(" %.2x ", in_seq[ii]);
    
       *   }
    
       *   printf("\n---|---------------\n");
    
       *   jj = 0;
    
       *   for(unsigned int ii=0; ii<N; ++ii){
    
       *       if(((ii+1) % 4) == 1){
    
       *           printf("%.2x | ", in_seq[jj++]);
    
       *       }
    
       *       printf("%.2x  ", z[ii]);
    
       *       if(!((ii+1) % 4)){
    
       *           printf("\n");
    
       *       }
    
       *   }
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32i_x2_or_32i_a_H
    
      #define INCLUDED_volk_32i_x2_or_32i_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX512F
    
      #include <immintrin.h>
    
      ✗
      static inline void volk_32i_x2_or_32i_a_avx512f(int32_t* cVector,
    
                                                      const int32_t* aVector,
    
                                                      const int32_t* bVector,
    
                                                      unsigned int num_points)
    
      {
    
      ✗
          unsigned int number = 0;
    
      ✗
          const unsigned int sixteenthPoints = num_points / 16;
    
      ✗
          int32_t* cPtr = (int32_t*)cVector;
    
      ✗
          const int32_t* aPtr = (int32_t*)aVector;
    
      ✗
          const int32_t* bPtr = (int32_t*)bVector;
    
          __m512i aVal, bVal, cVal;
    
      ✗
          for (; number < sixteenthPoints; number++) {
    
      ✗
              aVal = _mm512_load_si512(aPtr);
    
      ✗
              bVal = _mm512_load_si512(bPtr);
    
      ✗
              cVal = _mm512_or_si512(aVal, bVal);
    
              _mm512_store_si512(cPtr, cVal); // Store the results back into the C container
    
      ✗
              aPtr += 16;
    
      ✗
              bPtr += 16;
    
      ✗
              cPtr += 16;
    
          }
    
      ✗
          number = sixteenthPoints * 16;
    
      ✗
          for (; number < num_points; number++) {
    
      ✗
              cVector[number] = aVector[number] | bVector[number];
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_AVX512F */
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_32i_x2_or_32i_a_avx2(int32_t* cVector,
    
                                                   const int32_t* aVector,
    
                                                   const int32_t* bVector,
    
                                                   unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int oneEightPoints = num_points / 8;
    
      2
          int32_t* cPtr = cVector;
    
      2
          const int32_t* aPtr = aVector;
    
      2
          const int32_t* bPtr = bVector;
    
          __m256i aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < oneEightPoints; number++) {
    
      32766
              aVal = _mm256_load_si256((__m256i*)aPtr);
    
      32766
              bVal = _mm256_load_si256((__m256i*)bPtr);
    
      32766
              cVal = _mm256_or_si256(aVal, bVal);
    
              _mm256_store_si256((__m256i*)cPtr,
    
                                 cVal); // Store the results back into the C container
    
      32766
              aPtr += 8;
    
      32766
              bPtr += 8;
    
      32766
              cPtr += 8;
    
          }
    
      2
          number = oneEightPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              cVector[number] = aVector[number] | bVector[number];
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32i_x2_or_32i_a_sse(int32_t* cVector,
    
                                                  const int32_t* aVector,
    
                                                  const int32_t* bVector,
    
                                                  unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          float* cPtr = (float*)cVector;
    
      2
          const float* aPtr = (float*)aVector;
    
      2
          const float* bPtr = (float*)bVector;
    
          __m128 aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              aVal = _mm_load_ps(aPtr);
    
      65534
              bVal = _mm_load_ps(bPtr);
    
      65534
              cVal = _mm_or_ps(aVal, bVal);
    
              _mm_store_ps(cPtr, cVal); // Store the results back into the C container
    
      65534
              aPtr += 4;
    
      65534
              bPtr += 4;
    
      65534
              cPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              cVector[number] = aVector[number] | bVector[number];
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_NEON
    
      #include <arm_neon.h>
    
      static inline void volk_32i_x2_or_32i_neon(int32_t* cVector,
    
                                                 const int32_t* aVector,
    
                                                 const int32_t* bVector,
    
                                                 unsigned int num_points)
    
      {
    
          int32_t* cPtr = cVector;
    
          const int32_t* aPtr = aVector;
    
          const int32_t* bPtr = bVector;
    
          unsigned int number = 0;
    
          unsigned int quarter_points = num_points / 4;
    
          int32x4_t a_val, b_val, c_val;
    
          for (number = 0; number < quarter_points; number++) {
    
              a_val = vld1q_s32(aPtr);
    
              b_val = vld1q_s32(bPtr);
    
              c_val = vorrq_s32(a_val, b_val);
    
              vst1q_s32(cPtr, c_val);
    
              aPtr += 4;
    
              bPtr += 4;
    
              cPtr += 4;
    
          }
    
          for (number = quarter_points * 4; number < num_points; number++) {
    
              *cPtr++ = (*aPtr++) | (*bPtr++);
    
          }
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32i_x2_or_32i_generic(int32_t* cVector,
    
                                                    const int32_t* aVector,
    
                                                    const int32_t* bVector,
    
                                                    unsigned int num_points)
    
      {
    
      2
          int32_t* cPtr = cVector;
    
      2
          const int32_t* aPtr = aVector;
    
      2
          const int32_t* bPtr = bVector;
    
      2
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *cPtr++ = (*aPtr++) | (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #ifdef LV_HAVE_ORC
    
      extern void volk_32i_x2_or_32i_a_orc_impl(int32_t* cVector,
    
                                                const int32_t* aVector,
    
                                                const int32_t* bVector,
    
                                                unsigned int num_points);
    
      2
      static inline void volk_32i_x2_or_32i_u_orc(int32_t* cVector,
    
                                                  const int32_t* aVector,
    
                                                  const int32_t* bVector,
    
                                                  unsigned int num_points)
    
      {
    
      2
          volk_32i_x2_or_32i_a_orc_impl(cVector, aVector, bVector, num_points);
    
      2
      }
    
      #endif /* LV_HAVE_ORC */
    
      #endif /* INCLUDED_volk_32i_x2_or_32i_a_H */
    
      #ifndef INCLUDED_volk_32i_x2_or_32i_u_H
    
      #define INCLUDED_volk_32i_x2_or_32i_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX512F
    
      #include <immintrin.h>
    
      ✗
      static inline void volk_32i_x2_or_32i_u_avx512f(int32_t* cVector,
    
                                                      const int32_t* aVector,
    
                                                      const int32_t* bVector,
    
                                                      unsigned int num_points)
    
      {
    
      ✗
          unsigned int number = 0;
    
      ✗
          const unsigned int sixteenthPoints = num_points / 16;
    
      ✗
          int32_t* cPtr = (int32_t*)cVector;
    
      ✗
          const int32_t* aPtr = (int32_t*)aVector;
    
      ✗
          const int32_t* bPtr = (int32_t*)bVector;
    
          __m512i aVal, bVal, cVal;
    
      ✗
          for (; number < sixteenthPoints; number++) {
    
      ✗
              aVal = _mm512_loadu_si512(aPtr);
    
      ✗
              bVal = _mm512_loadu_si512(bPtr);
    
      ✗
              cVal = _mm512_or_si512(aVal, bVal);
    
              _mm512_storeu_si512(cPtr, cVal); // Store the results back into the C container
    
      ✗
              aPtr += 16;
    
      ✗
              bPtr += 16;
    
      ✗
              cPtr += 16;
    
          }
    
      ✗
          number = sixteenthPoints * 16;
    
      ✗
          for (; number < num_points; number++) {
    
      ✗
              cVector[number] = aVector[number] | bVector[number];
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_AVX512F */
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_32i_x2_or_32i_u_avx2(int32_t* cVector,
    
                                                   const int32_t* aVector,
    
                                                   const int32_t* bVector,
    
                                                   unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int oneEightPoints = num_points / 8;
    
      2
          int32_t* cPtr = cVector;
    
      2
          const int32_t* aPtr = aVector;
    
      2
          const int32_t* bPtr = bVector;
    
          __m256i aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < oneEightPoints; number++) {
    
      32766
              aVal = _mm256_loadu_si256((__m256i*)aPtr);
    
      32766
              bVal = _mm256_loadu_si256((__m256i*)bPtr);
    
      32766
              cVal = _mm256_or_si256(aVal, bVal);
    
              _mm256_storeu_si256((__m256i*)cPtr,
    
                                  cVal); // Store the results back into the C container
    
      32766
              aPtr += 8;
    
      32766
              bPtr += 8;
    
      32766
              cPtr += 8;
    
          }
    
      2
          number = oneEightPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              cVector[number] = aVector[number] | bVector[number];
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #endif /* INCLUDED_volk_32i_x2_or_32i_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32i_x2_or_32i
12			*
13			* \b Overview
14			*
15			* Computes the Boolean OR operation between two input 32-bit integer vectors.
16			*
17			* <b>Dispatcher Prototype</b>
18			* \code
19			* void volk_32i_x2_or_32i(int32_t* cVector, const int32_t* aVector, const int32_t*
20			* bVector, unsigned int num_points) \endcode
21			*
22			* \b Inputs
23			* \li aVector: Input vector of samples.
24			* \li bVector: Input vector of samples.
25			* \li num_points: The number of values.
26			*
27			* \b Outputs
28			* \li cVector: The output vector.
29			*
30			* \b Example
31			* This example generates a Karnaugh map for the first two bits of x OR y
32			* \code
33			* int N = 1<<4;
34			* unsigned int alignment = volk_get_alignment();
35			*
36			* int32_t* x = (int32_t)volk_malloc(Nsizeof(int32_t), alignment);
37			* int32_t* y = (int32_t)volk_malloc(Nsizeof(int32_t), alignment);
38			* int32_t* z = (int32_t)volk_malloc(Nsizeof(int32_t), alignment);
39			* int32_t in_seq[] = {0,1,3,2};
40			* unsigned int jj=0;
41			* for(unsigned int ii=0; ii<N; ++ii){
42			* x[ii] = in_seq[ii%4];
43			* y[ii] = in_seq[jj];
44			* if(((ii+1) % 4) == 0) jj++;
45			* }
46			*
47			* volk_32i_x2_or_32i(z, x, y, N);
48			*
49			* printf("Karnaugh map for x OR y\n");
50			* printf("y\\x\|");
51			* for(unsigned int ii=0; ii<4; ++ii){
52			* printf(" %.2x ", in_seq[ii]);
53			* }
54			* printf("\n---\|---------------\n");
55			* jj = 0;
56			* for(unsigned int ii=0; ii<N; ++ii){
57			* if(((ii+1) % 4) == 1){
58			* printf("%.2x \| ", in_seq[jj++]);
59			* }
60			* printf("%.2x ", z[ii]);
61			* if(!((ii+1) % 4)){
62			* printf("\n");
63			* }
64			* }
65			* \endcode
66			*/
67
68			#ifndef INCLUDED_volk_32i_x2_or_32i_a_H
69			#define INCLUDED_volk_32i_x2_or_32i_a_H
70
71			#include <inttypes.h>
72			#include <stdio.h>
73
74			#ifdef LV_HAVE_AVX512F
75			#include <immintrin.h>
76
77		✗	static inline void volk_32i_x2_or_32i_a_avx512f(int32_t* cVector,
78			const int32_t* aVector,
79			const int32_t* bVector,
80			unsigned int num_points)
81			{
82		✗	unsigned int number = 0;
83		✗	const unsigned int sixteenthPoints = num_points / 16;
84
85		✗	int32_t* cPtr = (int32_t*)cVector;
86		✗	const int32_t* aPtr = (int32_t*)aVector;
87		✗	const int32_t* bPtr = (int32_t*)bVector;
88
89			__m512i aVal, bVal, cVal;
90		✗	for (; number < sixteenthPoints; number++) {
91
92		✗	aVal = _mm512_load_si512(aPtr);
93		✗	bVal = _mm512_load_si512(bPtr);
94
95		✗	cVal = _mm512_or_si512(aVal, bVal);
96
97			_mm512_store_si512(cPtr, cVal); // Store the results back into the C container
98
99		✗	aPtr += 16;
100		✗	bPtr += 16;
101		✗	cPtr += 16;
102			}
103
104		✗	number = sixteenthPoints * 16;
105		✗	for (; number < num_points; number++) {
106		✗	cVector[number] = aVector[number] \| bVector[number];
107			}
108		✗	}
109			#endif /* LV_HAVE_AVX512F */
110
111			#ifdef LV_HAVE_AVX2
112			#include <immintrin.h>
113
114		2	static inline void volk_32i_x2_or_32i_a_avx2(int32_t* cVector,
115			const int32_t* aVector,
116			const int32_t* bVector,
117			unsigned int num_points)
118			{
119		2	unsigned int number = 0;
120		2	const unsigned int oneEightPoints = num_points / 8;
121
122		2	int32_t* cPtr = cVector;
123		2	const int32_t* aPtr = aVector;
124		2	const int32_t* bPtr = bVector;
125
126			__m256i aVal, bVal, cVal;
127	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < oneEightPoints; number++) {
128
129		32766	aVal = _mm256_load_si256((__m256i*)aPtr);
130		32766	bVal = _mm256_load_si256((__m256i*)bPtr);
131
132		32766	cVal = _mm256_or_si256(aVal, bVal);
133
134			_mm256_store_si256((__m256i*)cPtr,
135			cVal); // Store the results back into the C container
136
137		32766	aPtr += 8;
138		32766	bPtr += 8;
139		32766	cPtr += 8;
140			}
141
142		2	number = oneEightPoints * 8;
143	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
144		14	cVector[number] = aVector[number] \| bVector[number];
145			}
146		2	}
147			#endif /* LV_HAVE_AVX2 */
148
149
150			#ifdef LV_HAVE_SSE
151			#include <xmmintrin.h>
152
153		2	static inline void volk_32i_x2_or_32i_a_sse(int32_t* cVector,
154			const int32_t* aVector,
155			const int32_t* bVector,
156			unsigned int num_points)
157			{
158		2	unsigned int number = 0;
159		2	const unsigned int quarterPoints = num_points / 4;
160
161		2	float* cPtr = (float*)cVector;
162		2	const float* aPtr = (float*)aVector;
163		2	const float* bPtr = (float*)bVector;
164
165			__m128 aVal, bVal, cVal;
166	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
167		65534	aVal = _mm_load_ps(aPtr);
168		65534	bVal = _mm_load_ps(bPtr);
169
170		65534	cVal = _mm_or_ps(aVal, bVal);
171
172			_mm_store_ps(cPtr, cVal); // Store the results back into the C container
173
174		65534	aPtr += 4;
175		65534	bPtr += 4;
176		65534	cPtr += 4;
177			}
178
179		2	number = quarterPoints * 4;
180	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
181		6	cVector[number] = aVector[number] \| bVector[number];
182			}
183		2	}
184			#endif /* LV_HAVE_SSE */
185
186
187			#ifdef LV_HAVE_NEON
188			#include <arm_neon.h>
189
190			static inline void volk_32i_x2_or_32i_neon(int32_t* cVector,
191			const int32_t* aVector,
192			const int32_t* bVector,
193			unsigned int num_points)
194			{
195			int32_t* cPtr = cVector;
196			const int32_t* aPtr = aVector;
197			const int32_t* bPtr = bVector;
198			unsigned int number = 0;
199			unsigned int quarter_points = num_points / 4;
200
201			int32x4_t a_val, b_val, c_val;
202
203			for (number = 0; number < quarter_points; number++) {
204			a_val = vld1q_s32(aPtr);
205			b_val = vld1q_s32(bPtr);
206			c_val = vorrq_s32(a_val, b_val);
207			vst1q_s32(cPtr, c_val);
208			aPtr += 4;
209			bPtr += 4;
210			cPtr += 4;
211			}
212
213			for (number = quarter_points * 4; number < num_points; number++) {
214			cPtr++ = (aPtr++) \| (*bPtr++);
215			}
216			}
217			#endif /* LV_HAVE_NEON */
218
219
220			#ifdef LV_HAVE_GENERIC
221
222		2	static inline void volk_32i_x2_or_32i_generic(int32_t* cVector,
223			const int32_t* aVector,
224			const int32_t* bVector,
225			unsigned int num_points)
226			{
227		2	int32_t* cPtr = cVector;
228		2	const int32_t* aPtr = aVector;
229		2	const int32_t* bPtr = bVector;
230		2	unsigned int number = 0;
231
232	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
233		262142	cPtr++ = (aPtr++) \| (*bPtr++);
234			}
235		2	}
236			#endif /* LV_HAVE_GENERIC */
237
238
239			#ifdef LV_HAVE_ORC
240			extern void volk_32i_x2_or_32i_a_orc_impl(int32_t* cVector,
241			const int32_t* aVector,
242			const int32_t* bVector,
243			unsigned int num_points);
244
245		2	static inline void volk_32i_x2_or_32i_u_orc(int32_t* cVector,
246			const int32_t* aVector,
247			const int32_t* bVector,
248			unsigned int num_points)
249			{
250		2	volk_32i_x2_or_32i_a_orc_impl(cVector, aVector, bVector, num_points);
251		2	}
252			#endif /* LV_HAVE_ORC */
253
254
255			#endif /* INCLUDED_volk_32i_x2_or_32i_a_H */
256
257
258			#ifndef INCLUDED_volk_32i_x2_or_32i_u_H
259			#define INCLUDED_volk_32i_x2_or_32i_u_H
260
261			#include <inttypes.h>
262			#include <stdio.h>
263
264			#ifdef LV_HAVE_AVX512F
265			#include <immintrin.h>
266
267		✗	static inline void volk_32i_x2_or_32i_u_avx512f(int32_t* cVector,
268			const int32_t* aVector,
269			const int32_t* bVector,
270			unsigned int num_points)
271			{
272		✗	unsigned int number = 0;
273		✗	const unsigned int sixteenthPoints = num_points / 16;
274
275		✗	int32_t* cPtr = (int32_t*)cVector;
276		✗	const int32_t* aPtr = (int32_t*)aVector;
277		✗	const int32_t* bPtr = (int32_t*)bVector;
278
279			__m512i aVal, bVal, cVal;
280		✗	for (; number < sixteenthPoints; number++) {
281
282		✗	aVal = _mm512_loadu_si512(aPtr);
283		✗	bVal = _mm512_loadu_si512(bPtr);
284
285		✗	cVal = _mm512_or_si512(aVal, bVal);
286
287			_mm512_storeu_si512(cPtr, cVal); // Store the results back into the C container
288
289		✗	aPtr += 16;
290		✗	bPtr += 16;
291		✗	cPtr += 16;
292			}
293
294		✗	number = sixteenthPoints * 16;
295		✗	for (; number < num_points; number++) {
296		✗	cVector[number] = aVector[number] \| bVector[number];
297			}
298		✗	}
299			#endif /* LV_HAVE_AVX512F */
300
301			#ifdef LV_HAVE_AVX2
302			#include <immintrin.h>
303
304		2	static inline void volk_32i_x2_or_32i_u_avx2(int32_t* cVector,
305			const int32_t* aVector,
306			const int32_t* bVector,
307			unsigned int num_points)
308			{
309		2	unsigned int number = 0;
310		2	const unsigned int oneEightPoints = num_points / 8;
311
312		2	int32_t* cPtr = cVector;
313		2	const int32_t* aPtr = aVector;
314		2	const int32_t* bPtr = bVector;
315
316			__m256i aVal, bVal, cVal;
317	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < oneEightPoints; number++) {
318
319		32766	aVal = _mm256_loadu_si256((__m256i*)aPtr);
320		32766	bVal = _mm256_loadu_si256((__m256i*)bPtr);
321
322		32766	cVal = _mm256_or_si256(aVal, bVal);
323
324			_mm256_storeu_si256((__m256i*)cPtr,
325			cVal); // Store the results back into the C container
326
327		32766	aPtr += 8;
328		32766	bPtr += 8;
329		32766	cPtr += 8;
330			}
331
332		2	number = oneEightPoints * 8;
333	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
334		14	cVector[number] = aVector[number] \| bVector[number];
335			}
336		2	}
337			#endif /* LV_HAVE_AVX2 */
338
339
340			#endif /* INCLUDED_volk_32i_x2_or_32i_u_H */
341