GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_16i_convert_8i.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 88 88 100.0%
Functions: 6 6 100.0%
Branches: 20 20 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_16i_convert_8i
12 *
13 * \b Overview
14 *
15 * Converts 16-bit shorts to 8-bit chars.
16 *
17 * <b>Dispatcher Prototype</b>
18 * \code
19 * void volk_16i_convert_8i(int8_t* outputVector, const int16_t* inputVector, unsigned int
20 * num_points) \endcode
21 *
22 * \b Inputs
23 * \li inputVector: The input vector of 16-bit shorts.
24 * \li num_points: The number of complex data points.
25 *
26 * \b Outputs
27 * \li outputVector: The output vector of 8-bit chars.
28 *
29 * \b Example
30 * \code
31 * int N = 10000;
32 *
33 * volk_16i_convert_8i();
34 *
35 * volk_free(x);
36 * volk_free(t);
37 * \endcode
38 */
39
40 #ifndef INCLUDED_volk_16i_convert_8i_u_H
41 #define INCLUDED_volk_16i_convert_8i_u_H
42
43 #include <inttypes.h>
44 #include <stdio.h>
45
46 #ifdef LV_HAVE_AVX2
47 #include <immintrin.h>
48
49 2 static inline void volk_16i_convert_8i_u_avx2(int8_t* outputVector,
50 const int16_t* inputVector,
51 unsigned int num_points)
52 {
53 2 unsigned int number = 0;
54 2 const unsigned int thirtysecondPoints = num_points / 32;
55
56 2 int8_t* outputVectorPtr = outputVector;
57 2 int16_t* inputPtr = (int16_t*)inputVector;
58 __m256i inputVal1;
59 __m256i inputVal2;
60 __m256i ret;
61
62
2/2
✓ Branch 0 taken 8190 times.
✓ Branch 1 taken 2 times.
8192 for (; number < thirtysecondPoints; number++) {
63
64 // Load the 16 values
65 8190 inputVal1 = _mm256_loadu_si256((__m256i*)inputPtr);
66 8190 inputPtr += 16;
67 8190 inputVal2 = _mm256_loadu_si256((__m256i*)inputPtr);
68 8190 inputPtr += 16;
69
70 8190 inputVal1 = _mm256_srai_epi16(inputVal1, 8);
71 8190 inputVal2 = _mm256_srai_epi16(inputVal2, 8);
72
73 8190 ret = _mm256_packs_epi16(inputVal1, inputVal2);
74 8190 ret = _mm256_permute4x64_epi64(ret, 0b11011000);
75
76 _mm256_storeu_si256((__m256i*)outputVectorPtr, ret);
77
78 8190 outputVectorPtr += 32;
79 }
80
81 2 number = thirtysecondPoints * 32;
82
2/2
✓ Branch 0 taken 62 times.
✓ Branch 1 taken 2 times.
64 for (; number < num_points; number++) {
83 62 outputVector[number] = (int8_t)(inputVector[number] >> 8);
84 }
85 2 }
86 #endif /* LV_HAVE_AVX2 */
87
88
89 #ifdef LV_HAVE_SSE2
90 #include <emmintrin.h>
91
92 2 static inline void volk_16i_convert_8i_u_sse2(int8_t* outputVector,
93 const int16_t* inputVector,
94 unsigned int num_points)
95 {
96 2 unsigned int number = 0;
97 2 const unsigned int sixteenthPoints = num_points / 16;
98
99 2 int8_t* outputVectorPtr = outputVector;
100 2 int16_t* inputPtr = (int16_t*)inputVector;
101 __m128i inputVal1;
102 __m128i inputVal2;
103 __m128i ret;
104
105
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (; number < sixteenthPoints; number++) {
106
107 // Load the 16 values
108 16382 inputVal1 = _mm_loadu_si128((__m128i*)inputPtr);
109 16382 inputPtr += 8;
110 16382 inputVal2 = _mm_loadu_si128((__m128i*)inputPtr);
111 16382 inputPtr += 8;
112
113 16382 inputVal1 = _mm_srai_epi16(inputVal1, 8);
114 16382 inputVal2 = _mm_srai_epi16(inputVal2, 8);
115
116 16382 ret = _mm_packs_epi16(inputVal1, inputVal2);
117
118 _mm_storeu_si128((__m128i*)outputVectorPtr, ret);
119
120 16382 outputVectorPtr += 16;
121 }
122
123 2 number = sixteenthPoints * 16;
124
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
125 30 outputVector[number] = (int8_t)(inputVector[number] >> 8);
126 }
127 2 }
128 #endif /* LV_HAVE_SSE2 */
129
130
131 #ifdef LV_HAVE_GENERIC
132
133 2 static inline void volk_16i_convert_8i_generic(int8_t* outputVector,
134 const int16_t* inputVector,
135 unsigned int num_points)
136 {
137 2 int8_t* outputVectorPtr = outputVector;
138 2 const int16_t* inputVectorPtr = inputVector;
139 2 unsigned int number = 0;
140
141
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
142 262142 *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
143 }
144 2 }
145 #endif /* LV_HAVE_GENERIC */
146
147
148 #endif /* INCLUDED_volk_16i_convert_8i_u_H */
149 #ifndef INCLUDED_volk_16i_convert_8i_a_H
150 #define INCLUDED_volk_16i_convert_8i_a_H
151
152 #include <inttypes.h>
153 #include <stdio.h>
154
155 #ifdef LV_HAVE_AVX2
156 #include <immintrin.h>
157
158 2 static inline void volk_16i_convert_8i_a_avx2(int8_t* outputVector,
159 const int16_t* inputVector,
160 unsigned int num_points)
161 {
162 2 unsigned int number = 0;
163 2 const unsigned int thirtysecondPoints = num_points / 32;
164
165 2 int8_t* outputVectorPtr = outputVector;
166 2 int16_t* inputPtr = (int16_t*)inputVector;
167 __m256i inputVal1;
168 __m256i inputVal2;
169 __m256i ret;
170
171
2/2
✓ Branch 0 taken 8190 times.
✓ Branch 1 taken 2 times.
8192 for (; number < thirtysecondPoints; number++) {
172
173 // Load the 16 values
174 8190 inputVal1 = _mm256_load_si256((__m256i*)inputPtr);
175 8190 inputPtr += 16;
176 8190 inputVal2 = _mm256_load_si256((__m256i*)inputPtr);
177 8190 inputPtr += 16;
178
179 8190 inputVal1 = _mm256_srai_epi16(inputVal1, 8);
180 8190 inputVal2 = _mm256_srai_epi16(inputVal2, 8);
181
182 8190 ret = _mm256_packs_epi16(inputVal1, inputVal2);
183 8190 ret = _mm256_permute4x64_epi64(ret, 0b11011000);
184
185 _mm256_store_si256((__m256i*)outputVectorPtr, ret);
186
187 8190 outputVectorPtr += 32;
188 }
189
190 2 number = thirtysecondPoints * 32;
191
2/2
✓ Branch 0 taken 62 times.
✓ Branch 1 taken 2 times.
64 for (; number < num_points; number++) {
192 62 outputVector[number] = (int8_t)(inputVector[number] >> 8);
193 }
194 2 }
195 #endif /* LV_HAVE_AVX2 */
196
197
198 #ifdef LV_HAVE_SSE2
199 #include <emmintrin.h>
200
201 2 static inline void volk_16i_convert_8i_a_sse2(int8_t* outputVector,
202 const int16_t* inputVector,
203 unsigned int num_points)
204 {
205 2 unsigned int number = 0;
206 2 const unsigned int sixteenthPoints = num_points / 16;
207
208 2 int8_t* outputVectorPtr = outputVector;
209 2 int16_t* inputPtr = (int16_t*)inputVector;
210 __m128i inputVal1;
211 __m128i inputVal2;
212 __m128i ret;
213
214
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (; number < sixteenthPoints; number++) {
215
216 // Load the 16 values
217 16382 inputVal1 = _mm_load_si128((__m128i*)inputPtr);
218 16382 inputPtr += 8;
219 16382 inputVal2 = _mm_load_si128((__m128i*)inputPtr);
220 16382 inputPtr += 8;
221
222 16382 inputVal1 = _mm_srai_epi16(inputVal1, 8);
223 16382 inputVal2 = _mm_srai_epi16(inputVal2, 8);
224
225 16382 ret = _mm_packs_epi16(inputVal1, inputVal2);
226
227 _mm_store_si128((__m128i*)outputVectorPtr, ret);
228
229 16382 outputVectorPtr += 16;
230 }
231
232 2 number = sixteenthPoints * 16;
233
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
234 30 outputVector[number] = (int8_t)(inputVector[number] >> 8);
235 }
236 2 }
237 #endif /* LV_HAVE_SSE2 */
238
239
240 #ifdef LV_HAVE_NEON
241 #include <arm_neon.h>
242
243 static inline void volk_16i_convert_8i_neon(int8_t* outputVector,
244 const int16_t* inputVector,
245 unsigned int num_points)
246 {
247 int8_t* outputVectorPtr = outputVector;
248 const int16_t* inputVectorPtr = inputVector;
249 unsigned int number = 0;
250 unsigned int sixteenth_points = num_points / 16;
251
252 int16x8_t inputVal0;
253 int16x8_t inputVal1;
254 int8x8_t outputVal0;
255 int8x8_t outputVal1;
256 int8x16_t outputVal;
257
258 for (number = 0; number < sixteenth_points; number++) {
259 // load two input vectors
260 inputVal0 = vld1q_s16(inputVectorPtr);
261 inputVal1 = vld1q_s16(inputVectorPtr + 8);
262 // shift right
263 outputVal0 = vshrn_n_s16(inputVal0, 8);
264 outputVal1 = vshrn_n_s16(inputVal1, 8);
265 // squash two vectors and write output
266 outputVal = vcombine_s8(outputVal0, outputVal1);
267 vst1q_s8(outputVectorPtr, outputVal);
268 inputVectorPtr += 16;
269 outputVectorPtr += 16;
270 }
271
272 for (number = sixteenth_points * 16; number < num_points; number++) {
273 *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
274 }
275 }
276 #endif /* LV_HAVE_NEON */
277
278
279 #ifdef LV_HAVE_GENERIC
280
281 2 static inline void volk_16i_convert_8i_a_generic(int8_t* outputVector,
282 const int16_t* inputVector,
283 unsigned int num_points)
284 {
285 2 int8_t* outputVectorPtr = outputVector;
286 2 const int16_t* inputVectorPtr = inputVector;
287 2 unsigned int number = 0;
288
289
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
290 262142 *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
291 }
292 2 }
293 #endif /* LV_HAVE_GENERIC */
294
295 #endif /* INCLUDED_volk_16i_convert_8i_a_H */
296