GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_8u_conv_k7_r2puppet_8u.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 95 101 94.1%
Functions: 4 4 100.0%
Branches: 38 44 86.4%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 #ifndef INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
11 #define INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
12
13 #include <string.h>
14 #include <volk/volk.h>
15 #include <volk/volk_8u_x4_conv_k7_r2_8u.h>
16
17 typedef union {
18 // decision_t is a BIT vector
19 unsigned char* t;
20 unsigned int* w;
21 } p_decision_t;
22
23 256 static inline int parity(int x, unsigned char* Partab)
24 {
25 256 x ^= (x >> 16);
26 256 x ^= (x >> 8);
27 256 return Partab[x];
28 }
29
30 4 static inline int chainback_viterbi(unsigned char* data,
31 unsigned int nbits,
32 unsigned int endstate,
33 unsigned int tailsize,
34 unsigned char* decisions)
35 {
36 unsigned char* d;
37 4 int d_ADDSHIFT = 0;
38 4 int d_numstates = (1 << 6);
39 4 int d_decision_t_size = d_numstates / 8;
40 4 unsigned int d_k = 7;
41 4 int d_framebits = nbits;
42 /* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
43 4 d = decisions;
44 /* Make room beyond the end of the encoder register so we can
45 * accumulate a full byte of decoded data
46 */
47
48 4 endstate = (endstate % d_numstates) << d_ADDSHIFT;
49
50 /* The store into data[] only needs to be done every 8 bits.
51 * But this avoids a conditional branch, and the writes will
52 * combine in the cache anyway
53 */
54
55 4 d += tailsize * d_decision_t_size; /* Look past tail */
56 int retval;
57 4 int dif = tailsize - (d_k - 1);
58 // printf("break, %d, %d\n", dif, (nbits+dif)%d_framebits);
59 p_decision_t dec;
60
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 4 times.
28 while (nbits-- > d_framebits - (d_k - 1)) {
61 int k;
62 24 dec.t = &d[nbits * d_decision_t_size];
63 24 k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
64
65 24 endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
66 // data[((nbits+dif)%nbits)>>3] = endstate>>d_SUBSHIFT;
67 // printf("%d, %d\n", k, (nbits+dif)%d_framebits);
68 24 data[((nbits + dif) % d_framebits)] = k;
69
70 24 retval = endstate;
71 }
72 4 nbits += 1;
73
74
2/2
✓ Branch 0 taken 262092 times.
✓ Branch 1 taken 4 times.
262096 while (nbits-- != 0) {
75 int k;
76
77 262092 dec.t = &d[nbits * d_decision_t_size];
78
79 262092 k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
80
81 262092 endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
82 262092 data[((nbits + dif) % d_framebits)] = k;
83 }
84 // printf("%d, %d, %d, %d, %d, %d, %d, %d\n",
85 // data[4095],data[4094],data[4093],data[4092],data[4091],data[4090],data[4089],data[4088]);
86
87
88 4 return retval >> d_ADDSHIFT;
89 }
90
91
92 #if LV_HAVE_SSE3
93
94 #include <emmintrin.h>
95 #include <mmintrin.h>
96 #include <pmmintrin.h>
97 #include <stdio.h>
98 #include <xmmintrin.h>
99
100 2 static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* syms,
101 unsigned char* dec,
102 unsigned int framebits)
103 {
104
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (framebits < 12) {
105 return;
106 }
107
108 static int once = 1;
109 2 int d_numstates = (1 << 6);
110 2 int rate = 2;
111 static unsigned char* D;
112 static unsigned char* Y;
113 static unsigned char* X;
114 static unsigned int excess = 6;
115 static unsigned char* Branchtab;
116 static unsigned char Partab[256];
117
118 2 int d_polys[2] = { 79, 109 };
119
120
121
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if (once) {
122
123 2 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
124 2 Y = X + d_numstates;
125 2 Branchtab =
126 2 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
127 2 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
128 volk_get_alignment());
129 int state, i;
130 int cnt, ti;
131
132 /* Initialize parity lookup table */
133
2/2
✓ Branch 0 taken 512 times.
✓ Branch 1 taken 2 times.
514 for (i = 0; i < 256; i++) {
134 512 cnt = 0;
135 512 ti = i;
136
2/2
✓ Branch 0 taken 3586 times.
✓ Branch 1 taken 512 times.
4098 while (ti) {
137
2/2
✓ Branch 0 taken 2048 times.
✓ Branch 1 taken 1538 times.
3586 if (ti & 1)
138 2048 cnt++;
139 3586 ti >>= 1;
140 }
141 512 Partab[i] = cnt & 1;
142 }
143 /* Initialize the branch table */
144
2/2
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 2 times.
66 for (state = 0; state < d_numstates / 2; state++) {
145
2/2
✓ Branch 0 taken 128 times.
✓ Branch 1 taken 64 times.
192 for (i = 0; i < rate; i++) {
146
2/2
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
128 Branchtab[i * d_numstates / 2 + state] =
147 128 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
148 }
149 }
150
151 2 once = 0;
152 }
153
154 // unbias the old_metrics
155 2 memset(X, 31, d_numstates);
156
157 // initialize decisions
158 2 memset(D, 0, (d_numstates / 8) * (framebits + 6));
159
160 2 volk_8u_x4_conv_k7_r2_8u_spiral(
161 2 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
162
163 2 unsigned int min = X[0];
164 2 int i = 0, state = 0;
165
2/2
✓ Branch 0 taken 128 times.
✓ Branch 1 taken 2 times.
130 for (i = 0; i < (d_numstates); ++i) {
166
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 128 times.
128 if (X[i] < min) {
167 min = X[i];
168 state = i;
169 }
170 }
171
172 2 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
173
174 2 return;
175 }
176
177 #endif /*LV_HAVE_SSE3*/
178
179
180 #if LV_HAVE_NEON
181
182 #include "volk/sse2neon.h"
183
184 static inline void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char* syms,
185 unsigned char* dec,
186 unsigned int framebits)
187 {
188 if (framebits < 12) {
189 return;
190 }
191
192 static int once = 1;
193 int d_numstates = (1 << 6);
194 int rate = 2;
195 static unsigned char* D;
196 static unsigned char* Y;
197 static unsigned char* X;
198 static unsigned int excess = 6;
199 static unsigned char* Branchtab;
200 static unsigned char Partab[256];
201
202 int d_polys[2] = { 79, 109 };
203
204
205 if (once) {
206
207 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
208 Y = X + d_numstates;
209 Branchtab =
210 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
211 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
212 volk_get_alignment());
213 int state, i;
214 int cnt, ti;
215
216 /* Initialize parity lookup table */
217 for (i = 0; i < 256; i++) {
218 cnt = 0;
219 ti = i;
220 while (ti) {
221 if (ti & 1)
222 cnt++;
223 ti >>= 1;
224 }
225 Partab[i] = cnt & 1;
226 }
227 /* Initialize the branch table */
228 for (state = 0; state < d_numstates / 2; state++) {
229 for (i = 0; i < rate; i++) {
230 Branchtab[i * d_numstates / 2 + state] =
231 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
232 }
233 }
234
235 once = 0;
236 }
237
238 // unbias the old_metrics
239 memset(X, 31, d_numstates);
240
241 // initialize decisions
242 memset(D, 0, (d_numstates / 8) * (framebits + 6));
243
244 volk_8u_x4_conv_k7_r2_8u_neonspiral(
245 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
246
247 unsigned int min = X[0];
248 int i = 0, state = 0;
249 for (i = 0; i < (d_numstates); ++i) {
250 if (X[i] < min) {
251 min = X[i];
252 state = i;
253 }
254 }
255
256 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
257
258 return;
259 }
260
261 #endif /*LV_HAVE_NEON*/
262
263
264 //#if LV_HAVE_AVX2
265 //
266 //#include <immintrin.h>
267 //#include <stdio.h>
268 //
269 // static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* syms,
270 // unsigned char* dec,
271 // unsigned int framebits)
272 //{
273 // if (framebits < 12) {
274 // return;
275 // }
276 //
277 // static int once = 1;
278 // int d_numstates = (1 << 6);
279 // int rate = 2;
280 // static unsigned char* D;
281 // static unsigned char* Y;
282 // static unsigned char* X;
283 // static unsigned int excess = 6;
284 // static unsigned char* Branchtab;
285 // static unsigned char Partab[256];
286 //
287 // int d_polys[2] = { 79, 109 };
288 //
289 //
290 // if (once) {
291 //
292 // X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
293 // Y = X + d_numstates;
294 // Branchtab =
295 // (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
296 // D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
297 // volk_get_alignment());
298 // int state, i;
299 // int cnt, ti;
300 //
301 // /* Initialize parity lookup table */
302 // for (i = 0; i < 256; i++) {
303 // cnt = 0;
304 // ti = i;
305 // while (ti) {
306 // if (ti & 1)
307 // cnt++;
308 // ti >>= 1;
309 // }
310 // Partab[i] = cnt & 1;
311 // }
312 // /* Initialize the branch table */
313 // for (state = 0; state < d_numstates / 2; state++) {
314 // for (i = 0; i < rate; i++) {
315 // Branchtab[i * d_numstates / 2 + state] =
316 // parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
317 // }
318 // }
319 //
320 // once = 0;
321 // }
322 //
323 // // unbias the old_metrics
324 // memset(X, 31, d_numstates);
325 //
326 // // initialize decisions
327 // memset(D, 0, (d_numstates / 8) * (framebits + 6));
328 //
329 // volk_8u_x4_conv_k7_r2_8u_avx2(
330 // Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
331 //
332 // unsigned int min = X[0];
333 // int i = 0, state = 0;
334 // for (i = 0; i < (d_numstates); ++i) {
335 // if (X[i] < min) {
336 // min = X[i];
337 // state = i;
338 // }
339 // }
340 //
341 // chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
342 //
343 // return;
344 //}
345 //
346 //#endif /*LV_HAVE_AVX2*/
347
348
349 #if LV_HAVE_GENERIC
350
351
352 2 static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* syms,
353 unsigned char* dec,
354 unsigned int framebits)
355 {
356
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (framebits < 12) {
357 return;
358 }
359
360 static int once = 1;
361 2 int d_numstates = (1 << 6);
362 2 int rate = 2;
363 static unsigned char* Y;
364 static unsigned char* X;
365 static unsigned char* D;
366 static unsigned int excess = 6;
367 static unsigned char* Branchtab;
368 static unsigned char Partab[256];
369
370 2 int d_polys[2] = { 79, 109 };
371
372
373
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if (once) {
374
375 2 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
376 2 Y = X + d_numstates;
377 2 Branchtab =
378 2 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
379 2 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
380 volk_get_alignment());
381
382 int state, i;
383 int cnt, ti;
384
385 /* Initialize parity lookup table */
386
2/2
✓ Branch 0 taken 512 times.
✓ Branch 1 taken 2 times.
514 for (i = 0; i < 256; i++) {
387 512 cnt = 0;
388 512 ti = i;
389
2/2
✓ Branch 0 taken 3586 times.
✓ Branch 1 taken 512 times.
4098 while (ti) {
390
2/2
✓ Branch 0 taken 2048 times.
✓ Branch 1 taken 1538 times.
3586 if (ti & 1)
391 2048 cnt++;
392 3586 ti >>= 1;
393 }
394 512 Partab[i] = cnt & 1;
395 }
396 /* Initialize the branch table */
397
2/2
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 2 times.
66 for (state = 0; state < d_numstates / 2; state++) {
398
2/2
✓ Branch 0 taken 128 times.
✓ Branch 1 taken 64 times.
192 for (i = 0; i < rate; i++) {
399
2/2
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
128 Branchtab[i * d_numstates / 2 + state] =
400 128 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
401 }
402 }
403
404 2 once = 0;
405 }
406
407 // unbias the old_metrics
408 2 memset(X, 31, d_numstates);
409
410 // initialize decisions
411 2 memset(D, 0, (d_numstates / 8) * (framebits + 6));
412
413 2 volk_8u_x4_conv_k7_r2_8u_generic(
414 2 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
415
416 2 unsigned int min = X[0];
417 2 int i = 0, state = 0;
418
2/2
✓ Branch 0 taken 128 times.
✓ Branch 1 taken 2 times.
130 for (i = 0; i < (d_numstates); ++i) {
419
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 128 times.
128 if (X[i] < min) {
420 min = X[i];
421 state = i;
422 }
423 }
424
425 2 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
426
427 2 return;
428 }
429
430 #endif /* LV_HAVE_GENERIC */
431
432 #endif /*INCLUDED_volk_8u_conv_k7_r2puppet_8u_H*/
433