| Line | Branch | Exec | Source | 
|---|---|---|---|
| 1 | /* -*- c++ -*- */ | ||
| 2 | /* | ||
| 3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
| 4 | * | ||
| 5 | * This file is part of VOLK | ||
| 6 | * | ||
| 7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
| 8 | */ | ||
| 9 | |||
| 10 | /*! | ||
| 11 | * \page volk_64u_popcnt | ||
| 12 | * | ||
| 13 | * \b Overview | ||
| 14 | * | ||
| 15 | * Computes the population count (popcnt), or Hamming distance of a | ||
| 16 | * binary string. This kernel takes in a single unsigned 64-bit value | ||
| 17 | * and returns the count of 1's that the value contains. | ||
| 18 | * | ||
| 19 | * <b>Dispatcher Prototype</b> | ||
| 20 | * \code | ||
| 21 | * void volk_64u_popcnt(uint64_t* ret, const uint64_t value) | ||
| 22 | * \endcode | ||
| 23 | * | ||
| 24 | * \b Inputs | ||
| 25 | * \li value: The input value. | ||
| 26 | * | ||
| 27 | * \b Outputs | ||
| 28 | * \li ret: The return value containing the popcnt. | ||
| 29 | * | ||
| 30 | * \b Example | ||
| 31 | * \code | ||
| 32 | * int N = 10; | ||
| 33 | * unsigned int alignment = volk_get_alignment(); | ||
| 34 | * | ||
| 35 | * uint64_t bitstring[] = {0x0, 0x1, 0xf, 0xffffffffffffffff, | ||
| 36 | * 0x5555555555555555, 0xaaaaaaaaaaaaaaaa, 0x2a2a2a2a2a2a2a2a, | ||
| 37 | * 0xffffffff, 0x32, 0x64}; | ||
| 38 | * uint64_t hamming_distance = 0; | ||
| 39 | * | ||
| 40 | * for(unsigned int ii=0; ii<N; ++ii){ | ||
| 41 | * volk_64u_popcnt(&hamming_distance, bitstring[ii]); | ||
| 42 | * printf("hamming distance of %lx = %li\n", bitstring[ii], hamming_distance); | ||
| 43 | * } | ||
| 44 | * \endcode | ||
| 45 | */ | ||
| 46 | |||
| 47 | #ifndef INCLUDED_volk_64u_popcnt_a_H | ||
| 48 | #define INCLUDED_volk_64u_popcnt_a_H | ||
| 49 | |||
| 50 | #include <inttypes.h> | ||
| 51 | #include <stdio.h> | ||
| 52 | |||
| 53 | |||
| 54 | #ifdef LV_HAVE_GENERIC | ||
| 55 | |||
| 56 | |||
| 57 | 262142 | static inline void volk_64u_popcnt_generic(uint64_t* ret, const uint64_t value) | |
| 58 | { | ||
| 59 | // const uint32_t* valueVector = (const uint32_t*)&value; | ||
| 60 | |||
| 61 | // This is faster than a lookup table | ||
| 62 | // uint32_t retVal = valueVector[0]; | ||
| 63 | 262142 | uint32_t retVal = (uint32_t)(value & 0x00000000FFFFFFFFull); | |
| 64 | |||
| 65 | 262142 | retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555); | |
| 66 | 262142 | retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333); | |
| 67 | 262142 | retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F; | |
| 68 | 262142 | retVal = (retVal + (retVal >> 8)); | |
| 69 | 262142 | retVal = (retVal + (retVal >> 16)) & 0x0000003F; | |
| 70 | 262142 | uint64_t retVal64 = retVal; | |
| 71 | |||
| 72 | // retVal = valueVector[1]; | ||
| 73 | 262142 | retVal = (uint32_t)((value & 0xFFFFFFFF00000000ull) >> 32); | |
| 74 | 262142 | retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555); | |
| 75 | 262142 | retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333); | |
| 76 | 262142 | retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F; | |
| 77 | 262142 | retVal = (retVal + (retVal >> 8)); | |
| 78 | 262142 | retVal = (retVal + (retVal >> 16)) & 0x0000003F; | |
| 79 | 262142 | retVal64 += retVal; | |
| 80 | |||
| 81 | 262142 | *ret = retVal64; | |
| 82 | 262142 | } | |
| 83 | |||
| 84 | #endif /*LV_HAVE_GENERIC*/ | ||
| 85 | |||
| 86 | |||
| 87 | #if LV_HAVE_SSE4_2 && LV_HAVE_64 | ||
| 88 | |||
| 89 | #include <nmmintrin.h> | ||
| 90 | |||
| 91 | 262142 | static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret, const uint64_t value) | |
| 92 | { | ||
| 93 | 262142 | *ret = _mm_popcnt_u64(value); | |
| 94 | 262142 | } | |
| 95 | |||
| 96 | #endif /*LV_HAVE_SSE4_2*/ | ||
| 97 | |||
| 98 | |||
| 99 | #if LV_HAVE_NEON | ||
| 100 | #include <arm_neon.h> | ||
| 101 | static inline void volk_64u_popcnt_neon(uint64_t* ret, const uint64_t value) | ||
| 102 | { | ||
| 103 | uint8x8_t input_val, count8x8_val; | ||
| 104 | uint16x4_t count16x4_val; | ||
| 105 | uint32x2_t count32x2_val; | ||
| 106 | uint64x1_t count64x1_val; | ||
| 107 | |||
| 108 | input_val = vld1_u8((unsigned char*)&value); | ||
| 109 | count8x8_val = vcnt_u8(input_val); | ||
| 110 | count16x4_val = vpaddl_u8(count8x8_val); | ||
| 111 | count32x2_val = vpaddl_u16(count16x4_val); | ||
| 112 | count64x1_val = vpaddl_u32(count32x2_val); | ||
| 113 | vst1_u64(ret, count64x1_val); | ||
| 114 | |||
| 115 | //*ret = _mm_popcnt_u64(value); | ||
| 116 | } | ||
| 117 | #endif /*LV_HAVE_NEON*/ | ||
| 118 | |||
| 119 | |||
| 120 | #endif /*INCLUDED_volk_64u_popcnt_a_H*/ | ||
| 121 |