GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_8i_convert_16i.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 85 85 100.0%
Functions: 7 7 100.0%
Branches: 20 20 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_8i_convert_16i
12 *
13 * \b Overview
14 *
15 * Convert the input vector of 8-bit chars to a vector of 16-bit
16 * shorts.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_8i_convert_16i(int16_t* outputVector, const int8_t* inputVector, unsigned int
21 * num_points) \endcode
22 *
23 * \b Inputs
24 * \li inputVector: The input vector of 8-bit chars.
25 * \li num_points: The number of values.
26 *
27 * \b Outputs
28 * \li outputVector: The output 16-bit shorts.
29 *
30 * \b Example
31 * \code
32 * int N = 10000;
33 *
34 * volk_8i_convert_16i();
35 *
36 * volk_free(x);
37 * \endcode
38 */
39
40 #ifndef INCLUDED_volk_8i_convert_16i_u_H
41 #define INCLUDED_volk_8i_convert_16i_u_H
42
43 #include <inttypes.h>
44 #include <stdio.h>
45
46 #ifdef LV_HAVE_AVX2
47 #include <immintrin.h>
48
49 2 static inline void volk_8i_convert_16i_u_avx2(int16_t* outputVector,
50 const int8_t* inputVector,
51 unsigned int num_points)
52 {
53 2 unsigned int number = 0;
54 2 const unsigned int sixteenthPoints = num_points / 16;
55
56 2 const __m128i* inputVectorPtr = (const __m128i*)inputVector;
57 2 __m256i* outputVectorPtr = (__m256i*)outputVector;
58 __m128i inputVal;
59 __m256i ret;
60
61
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (; number < sixteenthPoints; number++) {
62 16382 inputVal = _mm_loadu_si128(inputVectorPtr);
63 16382 ret = _mm256_cvtepi8_epi16(inputVal);
64 16382 ret = _mm256_slli_epi16(ret, 8); // Multiply by 256
65 _mm256_storeu_si256(outputVectorPtr, ret);
66
67 16382 outputVectorPtr++;
68 16382 inputVectorPtr++;
69 }
70
71 2 number = sixteenthPoints * 16;
72
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
73 30 outputVector[number] = (int16_t)(inputVector[number]) * 256;
74 }
75 2 }
76 #endif /* LV_HAVE_AVX2 */
77
78
79 #ifdef LV_HAVE_SSE4_1
80 #include <smmintrin.h>
81
82 2 static inline void volk_8i_convert_16i_u_sse4_1(int16_t* outputVector,
83 const int8_t* inputVector,
84 unsigned int num_points)
85 {
86 2 unsigned int number = 0;
87 2 const unsigned int sixteenthPoints = num_points / 16;
88
89 2 const __m128i* inputVectorPtr = (const __m128i*)inputVector;
90 2 __m128i* outputVectorPtr = (__m128i*)outputVector;
91 __m128i inputVal;
92 __m128i ret;
93
94
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (; number < sixteenthPoints; number++) {
95 16382 inputVal = _mm_loadu_si128(inputVectorPtr);
96 16382 ret = _mm_cvtepi8_epi16(inputVal);
97 16382 ret = _mm_slli_epi16(ret, 8); // Multiply by 256
98 _mm_storeu_si128(outputVectorPtr, ret);
99
100 16382 outputVectorPtr++;
101
102 16382 inputVal = _mm_srli_si128(inputVal, 8);
103 16382 ret = _mm_cvtepi8_epi16(inputVal);
104 16382 ret = _mm_slli_epi16(ret, 8); // Multiply by 256
105 _mm_storeu_si128(outputVectorPtr, ret);
106
107 16382 outputVectorPtr++;
108
109 16382 inputVectorPtr++;
110 }
111
112 2 number = sixteenthPoints * 16;
113
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
114 30 outputVector[number] = (int16_t)(inputVector[number]) * 256;
115 }
116 2 }
117 #endif /* LV_HAVE_SSE4_1 */
118
119
120 #ifdef LV_HAVE_GENERIC
121
122 2 static inline void volk_8i_convert_16i_generic(int16_t* outputVector,
123 const int8_t* inputVector,
124 unsigned int num_points)
125 {
126 2 int16_t* outputVectorPtr = outputVector;
127 2 const int8_t* inputVectorPtr = inputVector;
128 2 unsigned int number = 0;
129
130
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
131 262142 *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
132 }
133 2 }
134 #endif /* LV_HAVE_GENERIC */
135
136
137 #endif /* INCLUDED_VOLK_8s_CONVERT_16s_UNALIGNED8_H */
138
139
140 #ifndef INCLUDED_volk_8i_convert_16i_a_H
141 #define INCLUDED_volk_8i_convert_16i_a_H
142
143 #include <inttypes.h>
144 #include <stdio.h>
145
146 #ifdef LV_HAVE_AVX2
147 #include <immintrin.h>
148
149 2 static inline void volk_8i_convert_16i_a_avx2(int16_t* outputVector,
150 const int8_t* inputVector,
151 unsigned int num_points)
152 {
153 2 unsigned int number = 0;
154 2 const unsigned int sixteenthPoints = num_points / 16;
155
156 2 const __m128i* inputVectorPtr = (const __m128i*)inputVector;
157 2 __m256i* outputVectorPtr = (__m256i*)outputVector;
158 __m128i inputVal;
159 __m256i ret;
160
161
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (; number < sixteenthPoints; number++) {
162 16382 inputVal = _mm_load_si128(inputVectorPtr);
163 16382 ret = _mm256_cvtepi8_epi16(inputVal);
164 16382 ret = _mm256_slli_epi16(ret, 8); // Multiply by 256
165 _mm256_store_si256(outputVectorPtr, ret);
166
167 16382 outputVectorPtr++;
168 16382 inputVectorPtr++;
169 }
170
171 2 number = sixteenthPoints * 16;
172
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
173 30 outputVector[number] = (int16_t)(inputVector[number]) * 256;
174 }
175 2 }
176 #endif /* LV_HAVE_AVX2 */
177
178
179 #ifdef LV_HAVE_SSE4_1
180 #include <smmintrin.h>
181
182 2 static inline void volk_8i_convert_16i_a_sse4_1(int16_t* outputVector,
183 const int8_t* inputVector,
184 unsigned int num_points)
185 {
186 2 unsigned int number = 0;
187 2 const unsigned int sixteenthPoints = num_points / 16;
188
189 2 const __m128i* inputVectorPtr = (const __m128i*)inputVector;
190 2 __m128i* outputVectorPtr = (__m128i*)outputVector;
191 __m128i inputVal;
192 __m128i ret;
193
194
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (; number < sixteenthPoints; number++) {
195 16382 inputVal = _mm_load_si128(inputVectorPtr);
196 16382 ret = _mm_cvtepi8_epi16(inputVal);
197 16382 ret = _mm_slli_epi16(ret, 8); // Multiply by 256
198 _mm_store_si128(outputVectorPtr, ret);
199
200 16382 outputVectorPtr++;
201
202 16382 inputVal = _mm_srli_si128(inputVal, 8);
203 16382 ret = _mm_cvtepi8_epi16(inputVal);
204 16382 ret = _mm_slli_epi16(ret, 8); // Multiply by 256
205 _mm_store_si128(outputVectorPtr, ret);
206
207 16382 outputVectorPtr++;
208
209 16382 inputVectorPtr++;
210 }
211
212 2 number = sixteenthPoints * 16;
213
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
214 30 outputVector[number] = (int16_t)(inputVector[number]) * 256;
215 }
216 2 }
217 #endif /* LV_HAVE_SSE4_1 */
218
219
220 #ifdef LV_HAVE_GENERIC
221
222 2 static inline void volk_8i_convert_16i_a_generic(int16_t* outputVector,
223 const int8_t* inputVector,
224 unsigned int num_points)
225 {
226 2 int16_t* outputVectorPtr = outputVector;
227 2 const int8_t* inputVectorPtr = inputVector;
228 2 unsigned int number = 0;
229
230
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
231 262142 *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
232 }
233 2 }
234 #endif /* LV_HAVE_GENERIC */
235
236
237 #ifdef LV_HAVE_NEON
238 #include <arm_neon.h>
239
240 static inline void volk_8i_convert_16i_neon(int16_t* outputVector,
241 const int8_t* inputVector,
242 unsigned int num_points)
243 {
244 int16_t* outputVectorPtr = outputVector;
245 const int8_t* inputVectorPtr = inputVector;
246 unsigned int number;
247 const unsigned int eighth_points = num_points / 8;
248
249 int8x8_t input_vec;
250 int16x8_t converted_vec;
251
252 // NEON doesn't have a concept of 8 bit registers, so we are really
253 // dealing with the low half of 16-bit registers. Since this requires
254 // a move instruction we likely do better with ASM here.
255 for (number = 0; number < eighth_points; ++number) {
256 input_vec = vld1_s8(inputVectorPtr);
257 converted_vec = vmovl_s8(input_vec);
258 // converted_vec = vmulq_s16(converted_vec, scale_factor);
259 converted_vec = vshlq_n_s16(converted_vec, 8);
260 vst1q_s16(outputVectorPtr, converted_vec);
261
262 inputVectorPtr += 8;
263 outputVectorPtr += 8;
264 }
265
266 for (number = eighth_points * 8; number < num_points; number++) {
267 *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
268 }
269 }
270 #endif /* LV_HAVE_NEON */
271
272
273 #ifdef LV_HAVE_ORC
274 extern void volk_8i_convert_16i_a_orc_impl(int16_t* outputVector,
275 const int8_t* inputVector,
276 unsigned int num_points);
277
278 2 static inline void volk_8i_convert_16i_u_orc(int16_t* outputVector,
279 const int8_t* inputVector,
280 unsigned int num_points)
281 {
282 2 volk_8i_convert_16i_a_orc_impl(outputVector, inputVector, num_points);
283 2 }
284 #endif /* LV_HAVE_ORC */
285
286
287 #endif /* INCLUDED_VOLK_8s_CONVERT_16s_ALIGNED8_H */
288