GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_64u_popcnt.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 20 20 100.0%
Functions: 2 2 100.0%
Branches: 0 0 -%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_64u_popcnt
12 *
13 * \b Overview
14 *
15 * Computes the population count (popcnt), or Hamming distance of a
16 * binary string. This kernel takes in a single unsigned 64-bit value
17 * and returns the count of 1's that the value contains.
18 *
19 * <b>Dispatcher Prototype</b>
20 * \code
21 * void volk_64u_popcnt(uint64_t* ret, const uint64_t value)
22 * \endcode
23 *
24 * \b Inputs
25 * \li value: The input value.
26 *
27 * \b Outputs
28 * \li ret: The return value containing the popcnt.
29 *
30 * \b Example
31 * \code
32 * int N = 10;
33 * unsigned int alignment = volk_get_alignment();
34 *
35 * uint64_t bitstring[] = {0x0, 0x1, 0xf, 0xffffffffffffffff,
36 * 0x5555555555555555, 0xaaaaaaaaaaaaaaaa, 0x2a2a2a2a2a2a2a2a,
37 * 0xffffffff, 0x32, 0x64};
38 * uint64_t hamming_distance = 0;
39 *
40 * for(unsigned int ii=0; ii<N; ++ii){
41 * volk_64u_popcnt(&hamming_distance, bitstring[ii]);
42 * printf("hamming distance of %lx = %li\n", bitstring[ii], hamming_distance);
43 * }
44 * \endcode
45 */
46
47 #ifndef INCLUDED_volk_64u_popcnt_a_H
48 #define INCLUDED_volk_64u_popcnt_a_H
49
50 #include <inttypes.h>
51 #include <stdio.h>
52
53
54 #ifdef LV_HAVE_GENERIC
55
56
57 262142 static inline void volk_64u_popcnt_generic(uint64_t* ret, const uint64_t value)
58 {
59 // const uint32_t* valueVector = (const uint32_t*)&value;
60
61 // This is faster than a lookup table
62 // uint32_t retVal = valueVector[0];
63 262142 uint32_t retVal = (uint32_t)(value & 0x00000000FFFFFFFFull);
64
65 262142 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
66 262142 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
67 262142 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
68 262142 retVal = (retVal + (retVal >> 8));
69 262142 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
70 262142 uint64_t retVal64 = retVal;
71
72 // retVal = valueVector[1];
73 262142 retVal = (uint32_t)((value & 0xFFFFFFFF00000000ull) >> 32);
74 262142 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
75 262142 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
76 262142 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
77 262142 retVal = (retVal + (retVal >> 8));
78 262142 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
79 262142 retVal64 += retVal;
80
81 262142 *ret = retVal64;
82 262142 }
83
84 #endif /*LV_HAVE_GENERIC*/
85
86
87 #if LV_HAVE_SSE4_2 && LV_HAVE_64
88
89 #include <nmmintrin.h>
90
91 262142 static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret, const uint64_t value)
92 {
93 262142 *ret = _mm_popcnt_u64(value);
94 262142 }
95
96 #endif /*LV_HAVE_SSE4_2*/
97
98
99 #if LV_HAVE_NEON
100 #include <arm_neon.h>
101 static inline void volk_64u_popcnt_neon(uint64_t* ret, const uint64_t value)
102 {
103 uint8x8_t input_val, count8x8_val;
104 uint16x4_t count16x4_val;
105 uint32x2_t count32x2_val;
106 uint64x1_t count64x1_val;
107
108 input_val = vld1_u8((unsigned char*)&value);
109 count8x8_val = vcnt_u8(input_val);
110 count16x4_val = vpaddl_u8(count8x8_val);
111 count32x2_val = vpaddl_u16(count16x4_val);
112 count64x1_val = vpaddl_u32(count32x2_val);
113 vst1_u64(ret, count64x1_val);
114
115 //*ret = _mm_popcnt_u64(value);
116 }
117 #endif /*LV_HAVE_NEON*/
118
119
120 #endif /*INCLUDED_volk_64u_popcnt_a_H*/
121