Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2015 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | /* | ||
11 | * This puppet is for VOLK tests only. | ||
12 | * For documentation see 'kernels/volk/volk_32f_8u_polarbutterfly_32f.h' | ||
13 | */ | ||
14 | |||
15 | #ifndef VOLK_KERNELS_VOLK_VOLK_32F_8U_POLARBUTTERFLYPUPPET_32F_H_ | ||
16 | #define VOLK_KERNELS_VOLK_VOLK_32F_8U_POLARBUTTERFLYPUPPET_32F_H_ | ||
17 | |||
18 | #include <volk/volk_32f_8u_polarbutterfly_32f.h> | ||
19 | #include <volk/volk_8u_x3_encodepolar_8u_x2.h> | ||
20 | #include <volk/volk_8u_x3_encodepolarpuppet_8u.h> | ||
21 | |||
22 | |||
23 | 6 | static inline void sanitize_bytes(unsigned char* u, const int elements) | |
24 | { | ||
25 | int i; | ||
26 | 6 | unsigned char* u_ptr = u; | |
27 |
2/2✓ Branch 0 taken 786426 times.
✓ Branch 1 taken 6 times.
|
786432 | for (i = 0; i < elements; i++) { |
28 | 786426 | *u_ptr = (*u_ptr & 0x01); | |
29 | 786426 | u_ptr++; | |
30 | } | ||
31 | 6 | } | |
32 | |||
33 | 12 | static inline void clean_up_intermediate_values(float* llrs, | |
34 | unsigned char* u, | ||
35 | const int frame_size, | ||
36 | const int elements) | ||
37 | { | ||
38 | 12 | memset(u + frame_size, 0, sizeof(unsigned char) * (elements - frame_size)); | |
39 | 12 | memset(llrs + frame_size, 0, sizeof(float) * (elements - frame_size)); | |
40 | 12 | } | |
41 | |||
42 | static inline void | ||
43 | 6 | generate_error_free_input_vector(float* llrs, unsigned char* u, const int frame_size) | |
44 | { | ||
45 | 6 | memset(u, 0, frame_size); | |
46 | 6 | unsigned char* target = u + frame_size; | |
47 | 6 | volk_8u_x2_encodeframepolar_8u_generic(target, u + 2 * frame_size, frame_size); | |
48 | 6 | float* ft = llrs; | |
49 | int i; | ||
50 |
2/2✓ Branch 0 taken 24576 times.
✓ Branch 1 taken 6 times.
|
24582 | for (i = 0; i < frame_size; i++) { |
51 | 24576 | *ft = (-2 * ((float)*target++)) + 1.0f; | |
52 | 24576 | ft++; | |
53 | } | ||
54 | 6 | } | |
55 | |||
56 | static inline void | ||
57 | print_llr_tree(const float* llrs, const int frame_size, const int frame_exp) | ||
58 | { | ||
59 | int s, e; | ||
60 | for (s = 0; s < frame_size; s++) { | ||
61 | for (e = 0; e < frame_exp + 1; e++) { | ||
62 | printf("%+4.2f ", llrs[e * frame_size + s]); | ||
63 | } | ||
64 | printf("\n"); | ||
65 | if ((s + 1) % 8 == 0) { | ||
66 | printf("\n"); | ||
67 | } | ||
68 | } | ||
69 | } | ||
70 | |||
71 | 6 | static inline int maximum_frame_size(const int elements) | |
72 | { | ||
73 | 6 | unsigned int frame_size = next_lower_power_of_two(elements); | |
74 | 6 | unsigned int frame_exp = log2_of_power_of_2(frame_size); | |
75 | 6 | return next_lower_power_of_two(frame_size / frame_exp); | |
76 | } | ||
77 | |||
78 | #ifdef LV_HAVE_GENERIC | ||
79 | 2 | static inline void volk_32f_8u_polarbutterflypuppet_32f_generic(float* llrs, | |
80 | const float* input, | ||
81 | unsigned char* u, | ||
82 | const int elements) | ||
83 | { | ||
84 | 2 | unsigned int frame_size = maximum_frame_size(elements); | |
85 | 2 | unsigned int frame_exp = log2_of_power_of_2(frame_size); | |
86 | |||
87 | 2 | sanitize_bytes(u, elements); | |
88 | 2 | clean_up_intermediate_values(llrs, u, frame_size, elements); | |
89 | 2 | generate_error_free_input_vector(llrs + frame_exp * frame_size, u, frame_size); | |
90 | |||
91 | 2 | unsigned int u_num = 0; | |
92 |
2/2✓ Branch 0 taken 8192 times.
✓ Branch 1 taken 2 times.
|
8194 | for (; u_num < frame_size; u_num++) { |
93 | 8192 | volk_32f_8u_polarbutterfly_32f_generic(llrs, u, frame_exp, 0, u_num, u_num); | |
94 | 8192 | u[u_num] = llrs[u_num] > 0 ? 0 : 1; | |
95 | } | ||
96 | |||
97 | 2 | clean_up_intermediate_values(llrs, u, frame_size, elements); | |
98 | 2 | } | |
99 | #endif /* LV_HAVE_GENERIC */ | ||
100 | |||
101 | #ifdef LV_HAVE_AVX | ||
102 | 2 | static inline void volk_32f_8u_polarbutterflypuppet_32f_u_avx(float* llrs, | |
103 | const float* input, | ||
104 | unsigned char* u, | ||
105 | const int elements) | ||
106 | { | ||
107 | 2 | unsigned int frame_size = maximum_frame_size(elements); | |
108 | 2 | unsigned int frame_exp = log2_of_power_of_2(frame_size); | |
109 | |||
110 | 2 | sanitize_bytes(u, elements); | |
111 | 2 | clean_up_intermediate_values(llrs, u, frame_size, elements); | |
112 | 2 | generate_error_free_input_vector(llrs + frame_exp * frame_size, u, frame_size); | |
113 | |||
114 | 2 | unsigned int u_num = 0; | |
115 |
2/2✓ Branch 0 taken 8192 times.
✓ Branch 1 taken 2 times.
|
8194 | for (; u_num < frame_size; u_num++) { |
116 | 8192 | volk_32f_8u_polarbutterfly_32f_u_avx(llrs, u, frame_exp, 0, u_num, u_num); | |
117 | 8192 | u[u_num] = llrs[u_num] > 0 ? 0 : 1; | |
118 | } | ||
119 | |||
120 | 2 | clean_up_intermediate_values(llrs, u, frame_size, elements); | |
121 | 2 | } | |
122 | #endif /* LV_HAVE_AVX */ | ||
123 | |||
124 | #ifdef LV_HAVE_AVX2 | ||
125 | 2 | static inline void volk_32f_8u_polarbutterflypuppet_32f_u_avx2(float* llrs, | |
126 | const float* input, | ||
127 | unsigned char* u, | ||
128 | const int elements) | ||
129 | { | ||
130 | 2 | unsigned int frame_size = maximum_frame_size(elements); | |
131 | 2 | unsigned int frame_exp = log2_of_power_of_2(frame_size); | |
132 | |||
133 | 2 | sanitize_bytes(u, elements); | |
134 | 2 | clean_up_intermediate_values(llrs, u, frame_size, elements); | |
135 | 2 | generate_error_free_input_vector(llrs + frame_exp * frame_size, u, frame_size); | |
136 | |||
137 | 2 | unsigned int u_num = 0; | |
138 |
2/2✓ Branch 0 taken 8192 times.
✓ Branch 1 taken 2 times.
|
8194 | for (; u_num < frame_size; u_num++) { |
139 | 8192 | volk_32f_8u_polarbutterfly_32f_u_avx2(llrs, u, frame_exp, 0, u_num, u_num); | |
140 | 8192 | u[u_num] = llrs[u_num] > 0 ? 0 : 1; | |
141 | } | ||
142 | |||
143 | 2 | clean_up_intermediate_values(llrs, u, frame_size, elements); | |
144 | 2 | } | |
145 | #endif /* LV_HAVE_AVX2 */ | ||
146 | |||
147 | |||
148 | #endif /* VOLK_KERNELS_VOLK_VOLK_32F_8U_POLARBUTTERFLYPUPPET_32F_H_ */ | ||
149 |