GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_16i_max_star_16i.h
Date:	2023-10-23 23:10:04

	Total	Coverage
Lines:	36	0.0%
Functions:	2	0.0%
Branches:	14	0.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_16i_max_star_16i
    
       *
    
       * \b Overview
    
       *
    
       * <FIXME>
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_16i_max_star_16i(short* target, short* src0, unsigned int num_points);
    
       * \endcode
    
       *
    
       * \b Inputs
    
       * \li src0: The input vector.
    
       * \li num_points: The number of complex data points.
    
       *
    
       * \b Outputs
    
       * \li target: The output value of the max* operation.
    
       *
    
       * \b Example
    
       * \code
    
       * int N = 10000;
    
       *
    
       * volk_16i_max_star_16i();
    
       *
    
       * volk_free(x);
    
       * volk_free(t);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_16i_max_star_16i_a_H
    
      #define INCLUDED_volk_16i_max_star_16i_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_SSSE3
    
      #include <emmintrin.h>
    
      #include <tmmintrin.h>
    
      #include <xmmintrin.h>
    
      static inline void
    
      ✗
      volk_16i_max_star_16i_a_ssse3(short* target, short* src0, unsigned int num_points)
    
      {
    
      ✗
          const unsigned int num_bytes = num_points * 2;
    
      ✗
          short candidate = src0[0];
    
          short cands[8];
    
          __m128i xmm0, xmm1, xmm3, xmm4, xmm5, xmm6;
    
          __m128i* p_src0;
    
      ✗
          p_src0 = (__m128i*)src0;
    
      ✗
          int bound = num_bytes >> 4;
    
      ✗
          int leftovers = (num_bytes >> 1) & 7;
    
      ✗
          int i = 0;
    
      ✗
          xmm1 = _mm_setzero_si128();
    
      ✗
          xmm0 = _mm_setzero_si128();
    
          //_mm_insert_epi16(xmm0, candidate, 0);
    
      ✗
          xmm0 = _mm_shuffle_epi8(xmm0, xmm1);
    
      ✗
          for (i = 0; i < bound; ++i) {
    
      ✗
              xmm1 = _mm_load_si128(p_src0);
    
      ✗
              p_src0 += 1;
    
              // xmm2 = _mm_sub_epi16(xmm1, xmm0);
    
      ✗
              xmm3 = _mm_cmpgt_epi16(xmm0, xmm1);
    
      ✗
              xmm4 = _mm_cmpeq_epi16(xmm0, xmm1);
    
      ✗
              xmm5 = _mm_cmpgt_epi16(xmm1, xmm0);
    
      ✗
              xmm6 = _mm_xor_si128(xmm4, xmm5);
    
      ✗
              xmm3 = _mm_and_si128(xmm3, xmm0);
    
      ✗
              xmm4 = _mm_and_si128(xmm6, xmm1);
    
      ✗
              xmm0 = _mm_add_epi16(xmm3, xmm4);
    
          }
    
          _mm_store_si128((__m128i*)cands, xmm0);
    
      ✗
          for (i = 0; i < 8; ++i) {
    
      ✗
              candidate = ((short)(candidate - cands[i]) > 0) ? candidate : cands[i];
    
          }
    
      ✗
          for (i = 0; i < leftovers; ++i) {
    
      ✗
              candidate = ((short)(candidate - src0[(bound << 3) + i]) > 0)
    
                              ? candidate
    
      ✗
                              : src0[(bound << 3) + i];
    
          }
    
      ✗
          target[0] = candidate;
    
      ✗
      }
    
      #endif /*LV_HAVE_SSSE3*/
    
      #ifdef LV_HAVE_NEON
    
      #include <arm_neon.h>
    
      static inline void
    
      volk_16i_max_star_16i_neon(short* target, short* src0, unsigned int num_points)
    
      {
    
          const unsigned int eighth_points = num_points / 8;
    
          unsigned number;
    
          int16x8_t input_vec;
    
          int16x8_t diff, zeros;
    
          uint16x8_t comp1, comp2;
    
          zeros = vdupq_n_s16(0);
    
          int16x8x2_t tmpvec;
    
          int16x8_t candidate_vec = vld1q_dup_s16(src0);
    
          short candidate;
    
          ++src0;
    
          for (number = 0; number < eighth_points; ++number) {
    
              input_vec = vld1q_s16(src0);
    
              __VOLK_PREFETCH(src0 + 16);
    
              diff = vsubq_s16(candidate_vec, input_vec);
    
              comp1 = vcgeq_s16(diff, zeros);
    
              comp2 = vcltq_s16(diff, zeros);
    
              tmpvec.val[0] = vandq_s16(candidate_vec, (int16x8_t)comp1);
    
              tmpvec.val[1] = vandq_s16(input_vec, (int16x8_t)comp2);
    
              candidate_vec = vaddq_s16(tmpvec.val[0], tmpvec.val[1]);
    
              src0 += 8;
    
          }
    
          vst1q_s16(&candidate, candidate_vec);
    
          for (number = 0; number < num_points % 8; number++) {
    
              candidate = ((int16_t)(candidate - src0[number]) > 0) ? candidate : src0[number];
    
          }
    
          target[0] = candidate;
    
      }
    
      #endif /*LV_HAVE_NEON*/
    
      #ifdef LV_HAVE_GENERIC
    
      static inline void
    
      ✗
      volk_16i_max_star_16i_generic(short* target, short* src0, unsigned int num_points)
    
      {
    
      ✗
          const unsigned int num_bytes = num_points * 2;
    
      ✗
          int i = 0;
    
      ✗
          int bound = num_bytes >> 1;
    
      ✗
          short candidate = src0[0];
    
      ✗
          for (i = 1; i < bound; ++i) {
    
      ✗
              candidate = ((short)(candidate - src0[i]) > 0) ? candidate : src0[i];
    
          }
    
      ✗
          target[0] = candidate;
    
      ✗
      }
    
      #endif /*LV_HAVE_GENERIC*/
    
      #endif /*INCLUDED_volk_16i_max_star_16i_a_H*/

Line	Exec	Source
1		/* -- c++ -- */
2		/*
3		* Copyright 2012, 2014 Free Software Foundation, Inc.
4		*
5		* This file is part of VOLK
6		*
7		* SPDX-License-Identifier: LGPL-3.0-or-later
8		*/
9
10		/*!
11		* \page volk_16i_max_star_16i
12		*
13		* \b Overview
14		*
15		* <FIXME>
16		*
17		* <b>Dispatcher Prototype</b>
18		* \code
19		* void volk_16i_max_star_16i(short* target, short* src0, unsigned int num_points);
20		* \endcode
21		*
22		* \b Inputs
23		* \li src0: The input vector.
24		* \li num_points: The number of complex data points.
25		*
26		* \b Outputs
27		* \li target: The output value of the max* operation.
28		*
29		* \b Example
30		* \code
31		* int N = 10000;
32		*
33		* volk_16i_max_star_16i();
34		*
35		* volk_free(x);
36		* volk_free(t);
37		* \endcode
38		*/
39
40		#ifndef INCLUDED_volk_16i_max_star_16i_a_H
41		#define INCLUDED_volk_16i_max_star_16i_a_H
42
43		#include <inttypes.h>
44		#include <stdio.h>
45
46		#ifdef LV_HAVE_SSSE3
47
48		#include <emmintrin.h>
49		#include <tmmintrin.h>
50		#include <xmmintrin.h>
51
52		static inline void
53	✗	volk_16i_max_star_16i_a_ssse3(short* target, short* src0, unsigned int num_points)
54		{
55	✗	const unsigned int num_bytes = num_points * 2;
56
57	✗	short candidate = src0[0];
58		short cands[8];
59		__m128i xmm0, xmm1, xmm3, xmm4, xmm5, xmm6;
60
61		__m128i* p_src0;
62
63	✗	p_src0 = (__m128i*)src0;
64
65	✗	int bound = num_bytes >> 4;
66	✗	int leftovers = (num_bytes >> 1) & 7;
67
68	✗	int i = 0;
69
70	✗	xmm1 = _mm_setzero_si128();
71	✗	xmm0 = _mm_setzero_si128();
72		//_mm_insert_epi16(xmm0, candidate, 0);
73
74	✗	xmm0 = _mm_shuffle_epi8(xmm0, xmm1);
75
76	✗	for (i = 0; i < bound; ++i) {
77	✗	xmm1 = _mm_load_si128(p_src0);
78	✗	p_src0 += 1;
79		// xmm2 = _mm_sub_epi16(xmm1, xmm0);
80
81	✗	xmm3 = _mm_cmpgt_epi16(xmm0, xmm1);
82	✗	xmm4 = _mm_cmpeq_epi16(xmm0, xmm1);
83	✗	xmm5 = _mm_cmpgt_epi16(xmm1, xmm0);
84
85	✗	xmm6 = _mm_xor_si128(xmm4, xmm5);
86
87	✗	xmm3 = _mm_and_si128(xmm3, xmm0);
88	✗	xmm4 = _mm_and_si128(xmm6, xmm1);
89
90	✗	xmm0 = _mm_add_epi16(xmm3, xmm4);
91		}
92
93		_mm_store_si128((__m128i*)cands, xmm0);
94
95	✗	for (i = 0; i < 8; ++i) {
96	✗	candidate = ((short)(candidate - cands[i]) > 0) ? candidate : cands[i];
97		}
98
99	✗	for (i = 0; i < leftovers; ++i) {
100	✗	candidate = ((short)(candidate - src0[(bound << 3) + i]) > 0)
101		? candidate
102	✗	: src0[(bound << 3) + i];
103		}
104
105	✗	target[0] = candidate;
106	✗	}
107
108		#endif /LV_HAVE_SSSE3/
109
110		#ifdef LV_HAVE_NEON
111		#include <arm_neon.h>
112
113		static inline void
114		volk_16i_max_star_16i_neon(short* target, short* src0, unsigned int num_points)
115		{
116		const unsigned int eighth_points = num_points / 8;
117		unsigned number;
118		int16x8_t input_vec;
119		int16x8_t diff, zeros;
120		uint16x8_t comp1, comp2;
121		zeros = vdupq_n_s16(0);
122
123		int16x8x2_t tmpvec;
124
125		int16x8_t candidate_vec = vld1q_dup_s16(src0);
126		short candidate;
127		++src0;
128
129		for (number = 0; number < eighth_points; ++number) {
130		input_vec = vld1q_s16(src0);
131		__VOLK_PREFETCH(src0 + 16);
132		diff = vsubq_s16(candidate_vec, input_vec);
133		comp1 = vcgeq_s16(diff, zeros);
134		comp2 = vcltq_s16(diff, zeros);
135
136		tmpvec.val[0] = vandq_s16(candidate_vec, (int16x8_t)comp1);
137		tmpvec.val[1] = vandq_s16(input_vec, (int16x8_t)comp2);
138
139		candidate_vec = vaddq_s16(tmpvec.val[0], tmpvec.val[1]);
140		src0 += 8;
141		}
142		vst1q_s16(&candidate, candidate_vec);
143
144		for (number = 0; number < num_points % 8; number++) {
145		candidate = ((int16_t)(candidate - src0[number]) > 0) ? candidate : src0[number];
146		}
147		target[0] = candidate;
148		}
149		#endif /LV_HAVE_NEON/
150
151		#ifdef LV_HAVE_GENERIC
152
153		static inline void
154	✗	volk_16i_max_star_16i_generic(short* target, short* src0, unsigned int num_points)
155		{
156	✗	const unsigned int num_bytes = num_points * 2;
157
158	✗	int i = 0;
159
160	✗	int bound = num_bytes >> 1;
161
162	✗	short candidate = src0[0];
163	✗	for (i = 1; i < bound; ++i) {
164	✗	candidate = ((short)(candidate - src0[i]) > 0) ? candidate : src0[i];
165		}
166	✗	target[0] = candidate;
167	✗	}
168
169		#endif /LV_HAVE_GENERIC/
170
171
172		#endif /INCLUDED_volk_16i_max_star_16i_a_H/
173