Line |
Branch |
Exec |
Source |
1 |
|
|
/* -*- c++ -*- */ |
2 |
|
|
/* |
3 |
|
|
* Copyright 2012, 2014 Free Software Foundation, Inc. |
4 |
|
|
* |
5 |
|
|
* This file is part of VOLK |
6 |
|
|
* |
7 |
|
|
* SPDX-License-Identifier: LGPL-3.0-or-later |
8 |
|
|
*/ |
9 |
|
|
|
10 |
|
|
/*! |
11 |
|
|
* \page volk_64u_popcnt |
12 |
|
|
* |
13 |
|
|
* \b Overview |
14 |
|
|
* |
15 |
|
|
* Computes the population count (popcnt), or Hamming distance of a |
16 |
|
|
* binary string. This kernel takes in a single unsigned 64-bit value |
17 |
|
|
* and returns the count of 1's that the value contains. |
18 |
|
|
* |
19 |
|
|
* <b>Dispatcher Prototype</b> |
20 |
|
|
* \code |
21 |
|
|
* void volk_64u_popcnt(uint64_t* ret, const uint64_t value) |
22 |
|
|
* \endcode |
23 |
|
|
* |
24 |
|
|
* \b Inputs |
25 |
|
|
* \li value: The input value. |
26 |
|
|
* |
27 |
|
|
* \b Outputs |
28 |
|
|
* \li ret: The return value containing the popcnt. |
29 |
|
|
* |
30 |
|
|
* \b Example |
31 |
|
|
* \code |
32 |
|
|
* int N = 10; |
33 |
|
|
* unsigned int alignment = volk_get_alignment(); |
34 |
|
|
* |
35 |
|
|
* uint64_t bitstring[] = {0x0, 0x1, 0xf, 0xffffffffffffffff, |
36 |
|
|
* 0x5555555555555555, 0xaaaaaaaaaaaaaaaa, 0x2a2a2a2a2a2a2a2a, |
37 |
|
|
* 0xffffffff, 0x32, 0x64}; |
38 |
|
|
* uint64_t hamming_distance = 0; |
39 |
|
|
* |
40 |
|
|
* for(unsigned int ii=0; ii<N; ++ii){ |
41 |
|
|
* volk_64u_popcnt(&hamming_distance, bitstring[ii]); |
42 |
|
|
* printf("hamming distance of %lx = %li\n", bitstring[ii], hamming_distance); |
43 |
|
|
* } |
44 |
|
|
* \endcode |
45 |
|
|
*/ |
46 |
|
|
|
47 |
|
|
#ifndef INCLUDED_volk_64u_popcnt_a_H |
48 |
|
|
#define INCLUDED_volk_64u_popcnt_a_H |
49 |
|
|
|
50 |
|
|
#include <inttypes.h> |
51 |
|
|
#include <stdio.h> |
52 |
|
|
|
53 |
|
|
|
54 |
|
|
#ifdef LV_HAVE_GENERIC |
55 |
|
|
|
56 |
|
|
|
57 |
|
262142 |
static inline void volk_64u_popcnt_generic(uint64_t* ret, const uint64_t value) |
58 |
|
|
{ |
59 |
|
|
// const uint32_t* valueVector = (const uint32_t*)&value; |
60 |
|
|
|
61 |
|
|
// This is faster than a lookup table |
62 |
|
|
// uint32_t retVal = valueVector[0]; |
63 |
|
262142 |
uint32_t retVal = (uint32_t)(value & 0x00000000FFFFFFFFull); |
64 |
|
|
|
65 |
|
262142 |
retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555); |
66 |
|
262142 |
retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333); |
67 |
|
262142 |
retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F; |
68 |
|
262142 |
retVal = (retVal + (retVal >> 8)); |
69 |
|
262142 |
retVal = (retVal + (retVal >> 16)) & 0x0000003F; |
70 |
|
262142 |
uint64_t retVal64 = retVal; |
71 |
|
|
|
72 |
|
|
// retVal = valueVector[1]; |
73 |
|
262142 |
retVal = (uint32_t)((value & 0xFFFFFFFF00000000ull) >> 32); |
74 |
|
262142 |
retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555); |
75 |
|
262142 |
retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333); |
76 |
|
262142 |
retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F; |
77 |
|
262142 |
retVal = (retVal + (retVal >> 8)); |
78 |
|
262142 |
retVal = (retVal + (retVal >> 16)) & 0x0000003F; |
79 |
|
262142 |
retVal64 += retVal; |
80 |
|
|
|
81 |
|
262142 |
*ret = retVal64; |
82 |
|
262142 |
} |
83 |
|
|
|
84 |
|
|
#endif /*LV_HAVE_GENERIC*/ |
85 |
|
|
|
86 |
|
|
|
87 |
|
|
#if LV_HAVE_SSE4_2 && LV_HAVE_64 |
88 |
|
|
|
89 |
|
|
#include <nmmintrin.h> |
90 |
|
|
|
91 |
|
262142 |
static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret, const uint64_t value) |
92 |
|
|
{ |
93 |
|
262142 |
*ret = _mm_popcnt_u64(value); |
94 |
|
262142 |
} |
95 |
|
|
|
96 |
|
|
#endif /*LV_HAVE_SSE4_2*/ |
97 |
|
|
|
98 |
|
|
|
99 |
|
|
#if LV_HAVE_NEON |
100 |
|
|
#include <arm_neon.h> |
101 |
|
|
static inline void volk_64u_popcnt_neon(uint64_t* ret, const uint64_t value) |
102 |
|
|
{ |
103 |
|
|
uint8x8_t input_val, count8x8_val; |
104 |
|
|
uint16x4_t count16x4_val; |
105 |
|
|
uint32x2_t count32x2_val; |
106 |
|
|
uint64x1_t count64x1_val; |
107 |
|
|
|
108 |
|
|
input_val = vld1_u8((unsigned char*)&value); |
109 |
|
|
count8x8_val = vcnt_u8(input_val); |
110 |
|
|
count16x4_val = vpaddl_u8(count8x8_val); |
111 |
|
|
count32x2_val = vpaddl_u16(count16x4_val); |
112 |
|
|
count64x1_val = vpaddl_u32(count32x2_val); |
113 |
|
|
vst1_u64(ret, count64x1_val); |
114 |
|
|
|
115 |
|
|
//*ret = _mm_popcnt_u64(value); |
116 |
|
|
} |
117 |
|
|
#endif /*LV_HAVE_NEON*/ |
118 |
|
|
|
119 |
|
|
|
120 |
|
|
#endif /*INCLUDED_volk_64u_popcnt_a_H*/ |
121 |
|
|
|