GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_16u_byteswap.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	68	79	86.1%
Functions:	5	7	71.4%
Branches:	16	18	88.9%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_16u_byteswap
    
       *
    
       * \b Overview
    
       *
    
       * Byteswaps (in-place) an aligned vector of int16_t's.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_16u_byteswap(uint16_t* intsToSwap, unsigned int num_points)
    
       * \endcode
    
       *
    
       * \b Inputs
    
       * \li intsToSwap: The vector of data to byte swap.
    
       * \li num_points: The number of data points.
    
       *
    
       * \b Outputs
    
       * \li intsToSwap: returns as an in-place calculation.
    
       *
    
       * \b Example
    
       * \code
    
       * int N = 10000;
    
       *
    
       * <FIXME>
    
       *
    
       * volk_16u_byteswap(x, N);
    
       *
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_16u_byteswap_u_H
    
      #define INCLUDED_volk_16u_byteswap_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_GENERIC
    
      4
      static inline void volk_16u_byteswap_generic(uint16_t* intsToSwap,
    
                                                   unsigned int num_points)
    
      {
    
      4
          uint16_t* inputPtr = intsToSwap;
    
        2/2✓ Branch 0 taken 262156 times.
✓ Branch 1 taken 4 times.

      262160
          for (unsigned int point = 0; point < num_points; point++) {
    
      262156
              uint16_t output = *inputPtr;
    
      262156
              output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
    
      262156
              *inputPtr = output;
    
      262156
              inputPtr++;
    
          }
    
      4
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #if LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_16u_byteswap_a_avx2(uint16_t* intsToSwap, unsigned int num_points)
    
      {
    
          unsigned int number;
    
      2
          const unsigned int nPerSet = 16;
    
      2
          const uint64_t nSets = num_points / nPerSet;
    
      2
          uint16_t* inputPtr = (uint16_t*)intsToSwap;
    
      2
          const uint8_t shuffleVector[32] = { 1,  0,  3,  2,  5,  4,  7,  6,  9,  8,  11,
    
                                              10, 13, 12, 15, 14, 17, 16, 19, 18, 21, 20,
    
                                              23, 22, 25, 24, 27, 26, 29, 28, 31, 30 };
    
      2
          const __m256i myShuffle = _mm256_loadu_si256((__m256i*)&shuffleVector[0]);
    
        2/2✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.

      16384
          for (number = 0; number < nSets; number++) {
    
              // Load the 32t values, increment inputPtr later since we're doing it in-place.
    
      16382
              const __m256i input = _mm256_load_si256((__m256i*)inputPtr);
    
      16382
              const __m256i output = _mm256_shuffle_epi8(input, myShuffle);
    
              // Store the results
    
              _mm256_store_si256((__m256i*)inputPtr, output);
    
      16382
              inputPtr += nPerSet;
    
          }
    
          // Byteswap any remaining points:
    
        2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.

      32
          for (number = nPerSet * nSets; number < num_points; number++) {
    
      30
              uint16_t outputVal = *inputPtr;
    
      30
              outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
    
      30
              *inputPtr = outputVal;
    
      30
              inputPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #if LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_16u_byteswap_u_avx2(uint16_t* intsToSwap, unsigned int num_points)
    
      {
    
          unsigned int number;
    
      2
          const unsigned int nPerSet = 16;
    
      2
          const uint64_t nSets = num_points / nPerSet;
    
      2
          uint16_t* inputPtr = (uint16_t*)intsToSwap;
    
      2
          const uint8_t shuffleVector[32] = { 1,  0,  3,  2,  5,  4,  7,  6,  9,  8,  11,
    
                                              10, 13, 12, 15, 14, 17, 16, 19, 18, 21, 20,
    
                                              23, 22, 25, 24, 27, 26, 29, 28, 31, 30 };
    
      2
          const __m256i myShuffle = _mm256_loadu_si256((__m256i*)&shuffleVector[0]);
    
        2/2✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.

      16384
          for (number = 0; number < nSets; number++) {
    
              // Load the 32t values, increment inputPtr later since we're doing it in-place.
    
      16382
              const __m256i input = _mm256_loadu_si256((__m256i*)inputPtr);
    
      16382
              const __m256i output = _mm256_shuffle_epi8(input, myShuffle);
    
              // Store the results
    
              _mm256_storeu_si256((__m256i*)inputPtr, output);
    
      16382
              inputPtr += nPerSet;
    
          }
    
          // Byteswap any remaining points:
    
        2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.

      32
          for (number = nPerSet * nSets; number < num_points; number++) {
    
      30
              uint16_t outputVal = *inputPtr;
    
      30
              outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
    
      30
              *inputPtr = outputVal;
    
      30
              inputPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      2
      static inline void volk_16u_byteswap_u_sse2(uint16_t* intsToSwap, unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          uint16_t* inputPtr = intsToSwap;
    
          __m128i input, left, right, output;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
              // Load the 16t values, increment inputPtr later since we're doing it in-place.
    
      32766
              input = _mm_loadu_si128((__m128i*)inputPtr);
    
              // Do the two shifts
    
      32766
              left = _mm_slli_epi16(input, 8);
    
      32766
              right = _mm_srli_epi16(input, 8);
    
              // Or the left and right halves together
    
      32766
              output = _mm_or_si128(left, right);
    
              // Store the results
    
              _mm_storeu_si128((__m128i*)inputPtr, output);
    
      32766
              inputPtr += 8;
    
          }
    
          // Byteswap any remaining points:
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              uint16_t outputVal = *inputPtr;
    
      14
              outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
    
      14
              *inputPtr = outputVal;
    
      14
              inputPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE2 */
    
      #endif /* INCLUDED_volk_16u_byteswap_u_H */
    
      #ifndef INCLUDED_volk_16u_byteswap_a_H
    
      #define INCLUDED_volk_16u_byteswap_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      2
      static inline void volk_16u_byteswap_a_sse2(uint16_t* intsToSwap, unsigned int num_points)
    
      {
    
      2
          uint16_t* inputPtr = intsToSwap;
    
          __m128i input, left, right, output;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (unsigned int number = 0; number < eighthPoints; number++) {
    
              // Load the 16t values, increment inputPtr later since we're doing it in-place.
    
      32766
              input = _mm_load_si128((__m128i*)inputPtr);
    
              // Do the two shifts
    
      32766
              left = _mm_slli_epi16(input, 8);
    
      32766
              right = _mm_srli_epi16(input, 8);
    
              // Or the left and right halves together
    
      32766
              output = _mm_or_si128(left, right);
    
              // Store the results
    
              _mm_store_si128((__m128i*)inputPtr, output);
    
      32766
              inputPtr += 8;
    
          }
    
          // Byteswap any remaining points:
    
      2
          volk_16u_byteswap_generic(inputPtr, num_points - eighthPoints * 8);
    
      2
      }
    
      #endif /* LV_HAVE_SSE2 */
    
      #ifdef LV_HAVE_NEON
    
      #include <arm_neon.h>
    
      static inline void volk_16u_byteswap_neon(uint16_t* intsToSwap, unsigned int num_points)
    
      {
    
          unsigned int number;
    
          unsigned int eighth_points = num_points / 8;
    
          uint16x8_t input, output;
    
          uint16_t* inputPtr = intsToSwap;
    
          for (number = 0; number < eighth_points; number++) {
    
              input = vld1q_u16(inputPtr);
    
              output = vsriq_n_u16(output, input, 8);
    
              output = vsliq_n_u16(output, input, 8);
    
              vst1q_u16(inputPtr, output);
    
              inputPtr += 8;
    
          }
    
          volk_16u_byteswap_generic(inputPtr, num_points - eighth_points * 8);
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #ifdef LV_HAVE_NEON
    
      #include <arm_neon.h>
    
      static inline void volk_16u_byteswap_neon_table(uint16_t* intsToSwap,
    
                                                      unsigned int num_points)
    
      {
    
          uint16_t* inputPtr = intsToSwap;
    
          unsigned int number = 0;
    
          unsigned int n16points = num_points / 16;
    
          uint8x8x4_t input_table;
    
          uint8x8_t int_lookup01, int_lookup23, int_lookup45, int_lookup67;
    
          uint8x8_t swapped_int01, swapped_int23, swapped_int45, swapped_int67;
    
          /* these magic numbers are used as byte-indices in the LUT.
    
             they are pre-computed to save time. A simple C program
    
             can calculate them; for example for lookup01:
    
            uint8_t chars[8] = {24, 16, 8, 0, 25, 17, 9, 1};
    
            for(ii=0; ii < 8; ++ii) {
    
                index += ((uint64_t)(*(chars+ii))) << (ii*8);
    
            }
    
          */
    
          int_lookup01 = vcreate_u8(1232017111498883080);
    
          int_lookup23 = vcreate_u8(1376697457175036426);
    
          int_lookup45 = vcreate_u8(1521377802851189772);
    
          int_lookup67 = vcreate_u8(1666058148527343118);
    
          for (number = 0; number < n16points; ++number) {
    
              input_table = vld4_u8((uint8_t*)inputPtr);
    
              swapped_int01 = vtbl4_u8(input_table, int_lookup01);
    
              swapped_int23 = vtbl4_u8(input_table, int_lookup23);
    
              swapped_int45 = vtbl4_u8(input_table, int_lookup45);
    
              swapped_int67 = vtbl4_u8(input_table, int_lookup67);
    
              vst1_u8((uint8_t*)inputPtr, swapped_int01);
    
              vst1_u8((uint8_t*)(inputPtr + 4), swapped_int23);
    
              vst1_u8((uint8_t*)(inputPtr + 8), swapped_int45);
    
              vst1_u8((uint8_t*)(inputPtr + 12), swapped_int67);
    
              inputPtr += 16;
    
          }
    
          volk_16u_byteswap_generic(inputPtr, num_points - n16points * 16);
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #ifdef LV_HAVE_GENERIC
    
      ✗
      static inline void volk_16u_byteswap_a_generic(uint16_t* intsToSwap,
    
                                                     unsigned int num_points)
    
      {
    
      ✗
          uint16_t* inputPtr = intsToSwap;
    
      ✗
          for (unsigned int point = 0; point < num_points; point++) {
    
      ✗
              uint16_t output = *inputPtr;
    
      ✗
              output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
    
      ✗
              *inputPtr = output;
    
      ✗
              inputPtr++;
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #ifdef LV_HAVE_ORC
    
      extern void volk_16u_byteswap_a_orc_impl(uint16_t* intsToSwap, unsigned int num_points);
    
      ✗
      static inline void volk_16u_byteswap_u_orc(uint16_t* intsToSwap, unsigned int num_points)
    
      {
    
      ✗
          volk_16u_byteswap_a_orc_impl(intsToSwap, num_points);
    
      ✗
      }
    
      #endif /* LV_HAVE_ORC */
    
      #endif /* INCLUDED_volk_16u_byteswap_a_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_16u_byteswap
12			*
13			* \b Overview
14			*
15			* Byteswaps (in-place) an aligned vector of int16_t's.
16			*
17			* <b>Dispatcher Prototype</b>
18			* \code
19			* void volk_16u_byteswap(uint16_t* intsToSwap, unsigned int num_points)
20			* \endcode
21			*
22			* \b Inputs
23			* \li intsToSwap: The vector of data to byte swap.
24			* \li num_points: The number of data points.
25			*
26			* \b Outputs
27			* \li intsToSwap: returns as an in-place calculation.
28			*
29			* \b Example
30			* \code
31			* int N = 10000;
32			*
33			* <FIXME>
34			*
35			* volk_16u_byteswap(x, N);
36			*
37			* \endcode
38			*/
39
40			#ifndef INCLUDED_volk_16u_byteswap_u_H
41			#define INCLUDED_volk_16u_byteswap_u_H
42
43			#include <inttypes.h>
44			#include <stdio.h>
45
46			#ifdef LV_HAVE_GENERIC
47
48		4	static inline void volk_16u_byteswap_generic(uint16_t* intsToSwap,
49			unsigned int num_points)
50			{
51		4	uint16_t* inputPtr = intsToSwap;
52	2/2 ✓ Branch 0 taken 262156 times. ✓ Branch 1 taken 4 times.	262160	for (unsigned int point = 0; point < num_points; point++) {
53		262156	uint16_t output = *inputPtr;
54		262156	output = (((output >> 8) & 0xff) \| ((output << 8) & 0xff00));
55		262156	*inputPtr = output;
56		262156	inputPtr++;
57			}
58		4	}
59			#endif /* LV_HAVE_GENERIC */
60
61
62			#if LV_HAVE_AVX2
63			#include <immintrin.h>
64		2	static inline void volk_16u_byteswap_a_avx2(uint16_t* intsToSwap, unsigned int num_points)
65			{
66			unsigned int number;
67
68		2	const unsigned int nPerSet = 16;
69		2	const uint64_t nSets = num_points / nPerSet;
70
71		2	uint16_t* inputPtr = (uint16_t*)intsToSwap;
72
73		2	const uint8_t shuffleVector[32] = { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11,
74			10, 13, 12, 15, 14, 17, 16, 19, 18, 21, 20,
75			23, 22, 25, 24, 27, 26, 29, 28, 31, 30 };
76
77		2	const __m256i myShuffle = _mm256_loadu_si256((__m256i*)&shuffleVector[0]);
78
79	2/2 ✓ Branch 0 taken 16382 times. ✓ Branch 1 taken 2 times.	16384	for (number = 0; number < nSets; number++) {
80			// Load the 32t values, increment inputPtr later since we're doing it in-place.
81		16382	const __m256i input = _mm256_load_si256((__m256i*)inputPtr);
82		16382	const __m256i output = _mm256_shuffle_epi8(input, myShuffle);
83
84			// Store the results
85			_mm256_store_si256((__m256i*)inputPtr, output);
86		16382	inputPtr += nPerSet;
87			}
88
89			// Byteswap any remaining points:
90	2/2 ✓ Branch 0 taken 30 times. ✓ Branch 1 taken 2 times.	32	for (number = nPerSet * nSets; number < num_points; number++) {
91		30	uint16_t outputVal = *inputPtr;
92		30	outputVal = (((outputVal >> 8) & 0xff) \| ((outputVal << 8) & 0xff00));
93		30	*inputPtr = outputVal;
94		30	inputPtr++;
95			}
96		2	}
97			#endif /* LV_HAVE_AVX2 */
98
99
100			#if LV_HAVE_AVX2
101			#include <immintrin.h>
102		2	static inline void volk_16u_byteswap_u_avx2(uint16_t* intsToSwap, unsigned int num_points)
103			{
104			unsigned int number;
105
106		2	const unsigned int nPerSet = 16;
107		2	const uint64_t nSets = num_points / nPerSet;
108
109		2	uint16_t* inputPtr = (uint16_t*)intsToSwap;
110
111		2	const uint8_t shuffleVector[32] = { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11,
112			10, 13, 12, 15, 14, 17, 16, 19, 18, 21, 20,
113			23, 22, 25, 24, 27, 26, 29, 28, 31, 30 };
114
115		2	const __m256i myShuffle = _mm256_loadu_si256((__m256i*)&shuffleVector[0]);
116
117	2/2 ✓ Branch 0 taken 16382 times. ✓ Branch 1 taken 2 times.	16384	for (number = 0; number < nSets; number++) {
118			// Load the 32t values, increment inputPtr later since we're doing it in-place.
119		16382	const __m256i input = _mm256_loadu_si256((__m256i*)inputPtr);
120		16382	const __m256i output = _mm256_shuffle_epi8(input, myShuffle);
121
122			// Store the results
123			_mm256_storeu_si256((__m256i*)inputPtr, output);
124		16382	inputPtr += nPerSet;
125			}
126
127			// Byteswap any remaining points:
128	2/2 ✓ Branch 0 taken 30 times. ✓ Branch 1 taken 2 times.	32	for (number = nPerSet * nSets; number < num_points; number++) {
129		30	uint16_t outputVal = *inputPtr;
130		30	outputVal = (((outputVal >> 8) & 0xff) \| ((outputVal << 8) & 0xff00));
131		30	*inputPtr = outputVal;
132		30	inputPtr++;
133			}
134		2	}
135			#endif /* LV_HAVE_AVX2 */
136
137
138			#ifdef LV_HAVE_SSE2
139			#include <emmintrin.h>
140
141		2	static inline void volk_16u_byteswap_u_sse2(uint16_t* intsToSwap, unsigned int num_points)
142			{
143		2	unsigned int number = 0;
144		2	uint16_t* inputPtr = intsToSwap;
145			__m128i input, left, right, output;
146
147		2	const unsigned int eighthPoints = num_points / 8;
148	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
149			// Load the 16t values, increment inputPtr later since we're doing it in-place.
150		32766	input = _mm_loadu_si128((__m128i*)inputPtr);
151			// Do the two shifts
152		32766	left = _mm_slli_epi16(input, 8);
153		32766	right = _mm_srli_epi16(input, 8);
154			// Or the left and right halves together
155		32766	output = _mm_or_si128(left, right);
156			// Store the results
157			_mm_storeu_si128((__m128i*)inputPtr, output);
158		32766	inputPtr += 8;
159			}
160
161			// Byteswap any remaining points:
162		2	number = eighthPoints * 8;
163	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
164		14	uint16_t outputVal = *inputPtr;
165		14	outputVal = (((outputVal >> 8) & 0xff) \| ((outputVal << 8) & 0xff00));
166		14	*inputPtr = outputVal;
167		14	inputPtr++;
168			}
169		2	}
170			#endif /* LV_HAVE_SSE2 */
171
172
173			#endif /* INCLUDED_volk_16u_byteswap_u_H */
174			#ifndef INCLUDED_volk_16u_byteswap_a_H
175			#define INCLUDED_volk_16u_byteswap_a_H
176
177			#include <inttypes.h>
178			#include <stdio.h>
179
180			#ifdef LV_HAVE_SSE2
181			#include <emmintrin.h>
182
183		2	static inline void volk_16u_byteswap_a_sse2(uint16_t* intsToSwap, unsigned int num_points)
184			{
185		2	uint16_t* inputPtr = intsToSwap;
186			__m128i input, left, right, output;
187
188		2	const unsigned int eighthPoints = num_points / 8;
189	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (unsigned int number = 0; number < eighthPoints; number++) {
190			// Load the 16t values, increment inputPtr later since we're doing it in-place.
191		32766	input = _mm_load_si128((__m128i*)inputPtr);
192			// Do the two shifts
193		32766	left = _mm_slli_epi16(input, 8);
194		32766	right = _mm_srli_epi16(input, 8);
195			// Or the left and right halves together
196		32766	output = _mm_or_si128(left, right);
197			// Store the results
198			_mm_store_si128((__m128i*)inputPtr, output);
199		32766	inputPtr += 8;
200			}
201
202			// Byteswap any remaining points:
203		2	volk_16u_byteswap_generic(inputPtr, num_points - eighthPoints * 8);
204		2	}
205			#endif /* LV_HAVE_SSE2 */
206
207			#ifdef LV_HAVE_NEON
208			#include <arm_neon.h>
209
210			static inline void volk_16u_byteswap_neon(uint16_t* intsToSwap, unsigned int num_points)
211			{
212			unsigned int number;
213			unsigned int eighth_points = num_points / 8;
214			uint16x8_t input, output;
215			uint16_t* inputPtr = intsToSwap;
216
217			for (number = 0; number < eighth_points; number++) {
218			input = vld1q_u16(inputPtr);
219			output = vsriq_n_u16(output, input, 8);
220			output = vsliq_n_u16(output, input, 8);
221			vst1q_u16(inputPtr, output);
222			inputPtr += 8;
223			}
224
225			volk_16u_byteswap_generic(inputPtr, num_points - eighth_points * 8);
226			}
227			#endif /* LV_HAVE_NEON */
228
229			#ifdef LV_HAVE_NEON
230			#include <arm_neon.h>
231
232			static inline void volk_16u_byteswap_neon_table(uint16_t* intsToSwap,
233			unsigned int num_points)
234			{
235			uint16_t* inputPtr = intsToSwap;
236			unsigned int number = 0;
237			unsigned int n16points = num_points / 16;
238
239			uint8x8x4_t input_table;
240			uint8x8_t int_lookup01, int_lookup23, int_lookup45, int_lookup67;
241			uint8x8_t swapped_int01, swapped_int23, swapped_int45, swapped_int67;
242
243			/* these magic numbers are used as byte-indices in the LUT.
244			they are pre-computed to save time. A simple C program
245			can calculate them; for example for lookup01:
246			uint8_t chars[8] = {24, 16, 8, 0, 25, 17, 9, 1};
247			for(ii=0; ii < 8; ++ii) {
248			index += ((uint64_t)((chars+ii))) << (ii8);
249			}
250			*/
251			int_lookup01 = vcreate_u8(1232017111498883080);
252			int_lookup23 = vcreate_u8(1376697457175036426);
253			int_lookup45 = vcreate_u8(1521377802851189772);
254			int_lookup67 = vcreate_u8(1666058148527343118);
255
256			for (number = 0; number < n16points; ++number) {
257			input_table = vld4_u8((uint8_t*)inputPtr);
258			swapped_int01 = vtbl4_u8(input_table, int_lookup01);
259			swapped_int23 = vtbl4_u8(input_table, int_lookup23);
260			swapped_int45 = vtbl4_u8(input_table, int_lookup45);
261			swapped_int67 = vtbl4_u8(input_table, int_lookup67);
262			vst1_u8((uint8_t*)inputPtr, swapped_int01);
263			vst1_u8((uint8_t*)(inputPtr + 4), swapped_int23);
264			vst1_u8((uint8_t*)(inputPtr + 8), swapped_int45);
265			vst1_u8((uint8_t*)(inputPtr + 12), swapped_int67);
266
267			inputPtr += 16;
268			}
269
270			volk_16u_byteswap_generic(inputPtr, num_points - n16points * 16);
271			}
272			#endif /* LV_HAVE_NEON */
273
274			#ifdef LV_HAVE_GENERIC
275
276		✗	static inline void volk_16u_byteswap_a_generic(uint16_t* intsToSwap,
277			unsigned int num_points)
278			{
279		✗	uint16_t* inputPtr = intsToSwap;
280		✗	for (unsigned int point = 0; point < num_points; point++) {
281		✗	uint16_t output = *inputPtr;
282		✗	output = (((output >> 8) & 0xff) \| ((output << 8) & 0xff00));
283		✗	*inputPtr = output;
284		✗	inputPtr++;
285			}
286		✗	}
287			#endif /* LV_HAVE_GENERIC */
288
289			#ifdef LV_HAVE_ORC
290
291			extern void volk_16u_byteswap_a_orc_impl(uint16_t* intsToSwap, unsigned int num_points);
292		✗	static inline void volk_16u_byteswap_u_orc(uint16_t* intsToSwap, unsigned int num_points)
293			{
294		✗	volk_16u_byteswap_a_orc_impl(intsToSwap, num_points);
295		✗	}
296			#endif /* LV_HAVE_ORC */
297
298
299			#endif /* INCLUDED_volk_16u_byteswap_a_H */
300