26 namespace seqan3::detail
31 template <simd::simd_concept simd_t>
32 constexpr simd_t load_sse4(
void const * mem_addr);
37 template <simd::simd_concept simd_t>
38 inline void transpose_matrix_sse4(
std::array<simd_t, simd_traits<simd_t>::length> & matrix);
43 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
44 constexpr target_simd_t upcast_signed_sse4(source_simd_t
const & src);
49 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
50 constexpr target_simd_t upcast_unsigned_sse4(source_simd_t
const & src);
55 template <u
int8_t index, simd::simd_concept simd_t>
56 constexpr simd_t extract_half_sse4(simd_t
const & src);
61 template <u
int8_t index, simd::simd_concept simd_t>
62 constexpr simd_t extract_quarter_sse4(simd_t
const & src);
67 template <u
int8_t index, simd::simd_concept simd_t>
68 constexpr simd_t extract_eighth_sse4(simd_t
const & src);
78 namespace seqan3::detail
81 template <simd::simd_concept simd_t>
82 constexpr simd_t load_sse4(
void const * mem_addr)
84 return reinterpret_cast<simd_t
>(_mm_loadu_si128(
reinterpret_cast<__m128i
const *
>(mem_addr)));
87 template <simd::simd_concept simd_t>
88 inline void transpose_matrix_sse4(
std::array<simd_t, simd_traits<simd_t>::length> & matrix)
90 static_assert(simd_traits<simd_t>::length == simd_traits<simd_t>::max_length,
"Expects byte scalar type.");
91 static_assert(is_native_builtin_simd_v<simd_t>,
"The passed simd vector is not a native SSE4 simd vector type.");
92 static_assert(is_builtin_simd_v<simd_t>,
"The passed simd vector is not a builtin vector type.");
96 constexpr
std::array<char, 16> bit_reverse{0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
106 for (
int i = 0; i < 8; ++i)
108 tmp1[i] = _mm_unpacklo_epi8(
reinterpret_cast<__m128i &
>(matrix[2*i]),
109 reinterpret_cast<__m128i &
>(matrix[2*i+1]));
110 tmp1[i+8] = _mm_unpackhi_epi8(
reinterpret_cast<__m128i &
>(matrix[2*i]),
111 reinterpret_cast<__m128i &
>(matrix[2*i+1]));
121 for (
int i = 0; i < 8; ++i)
123 tmp2[i] = _mm_unpacklo_epi16(tmp1[2*i], tmp1[2*i+1]);
124 tmp2[i+8] = _mm_unpackhi_epi16(tmp1[2*i], tmp1[2*i+1]);
138 for (
int i = 0; i < 8; ++i)
140 tmp1[i] = _mm_unpacklo_epi32(tmp2[2*i], tmp2[2*i+1]);
141 tmp1[i+8] = _mm_unpackhi_epi32(tmp2[2*i], tmp2[2*i+1]);
148 for (
int i = 0; i < 8; ++i)
150 matrix[bit_reverse[i]] =
reinterpret_cast<simd_t
>(_mm_unpacklo_epi64(tmp1[2*i], tmp1[2*i+1]));
151 matrix[bit_reverse[i+8]] =
reinterpret_cast<simd_t
>(_mm_unpackhi_epi64(tmp1[2*i], tmp1[2*i+1]));
155 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
156 constexpr target_simd_t upcast_signed_sse4(source_simd_t
const & src)
158 if constexpr (simd_traits<source_simd_t>::length == 16)
160 if constexpr (simd_traits<target_simd_t>::length == 8)
161 return reinterpret_cast<target_simd_t>(_mm_cvtepi8_epi16(reinterpret_cast<__m128i const &>(src)));
162 if constexpr (simd_traits<target_simd_t>::length == 4)
163 return reinterpret_cast<target_simd_t>(_mm_cvtepi8_epi32(reinterpret_cast<__m128i const &>(src)));
164 if constexpr (simd_traits<target_simd_t>::length == 2)
165 return reinterpret_cast<target_simd_t>(_mm_cvtepi8_epi64(reinterpret_cast<__m128i const &>(src)));
167 else if constexpr (simd_traits<source_simd_t>::length == 8)
169 if constexpr (simd_traits<target_simd_t>::length == 4)
170 return reinterpret_cast<target_simd_t>(_mm_cvtepi16_epi32(reinterpret_cast<__m128i const &>(src)));
171 if constexpr (simd_traits<target_simd_t>::length == 2)
172 return reinterpret_cast<target_simd_t>(_mm_cvtepi16_epi64(reinterpret_cast<__m128i const &>(src)));
176 static_assert(simd_traits<source_simd_t>::length == 4,
"Expected 32 bit scalar type.");
177 return reinterpret_cast<target_simd_t
>(_mm_cvtepi32_epi64(
reinterpret_cast<__m128i
const &
>(src)));
181 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
182 constexpr target_simd_t upcast_unsigned_sse4(source_simd_t
const & src)
184 if constexpr (simd_traits<source_simd_t>::length == 16)
186 if constexpr (simd_traits<target_simd_t>::length == 8)
187 return reinterpret_cast<target_simd_t>(_mm_cvtepu8_epi16(reinterpret_cast<__m128i const &>(src)));
188 if constexpr (simd_traits<target_simd_t>::length == 4)
189 return reinterpret_cast<target_simd_t>(_mm_cvtepu8_epi32(reinterpret_cast<__m128i const &>(src)));
190 if constexpr (simd_traits<target_simd_t>::length == 2)
191 return reinterpret_cast<target_simd_t>(_mm_cvtepu8_epi64(reinterpret_cast<__m128i const &>(src)));
193 else if constexpr (simd_traits<source_simd_t>::length == 8)
195 if constexpr (simd_traits<target_simd_t>::length == 4)
196 return reinterpret_cast<target_simd_t>(_mm_cvtepu16_epi32(reinterpret_cast<__m128i const &>(src)));
197 if constexpr (simd_traits<target_simd_t>::length == 2)
198 return reinterpret_cast<target_simd_t>(_mm_cvtepu16_epi64(reinterpret_cast<__m128i const &>(src)));
202 static_assert(simd_traits<source_simd_t>::length == 4,
"Expected 32 bit scalar type.");
203 return reinterpret_cast<target_simd_t
>(_mm_cvtepu32_epi64(
reinterpret_cast<__m128i
const &
>(src)));
207 template <u
int8_t index, simd::simd_concept simd_t>
208 constexpr simd_t extract_half_sse4(simd_t
const & src)
210 return reinterpret_cast<simd_t
>(_mm_srli_si128(
reinterpret_cast<__m128i
const &
>(src), (index) << 3));
213 template <u
int8_t index, simd::simd_concept simd_t>
214 constexpr simd_t extract_quarter_sse4(simd_t
const & src)
216 return reinterpret_cast<simd_t
>(_mm_srli_si128(
reinterpret_cast<__m128i
const &
>(src), index << 2));
219 template <u
int8_t index, simd::simd_concept simd_t>
220 constexpr simd_t extract_eighth_sse4(simd_t
const & src)
222 return reinterpret_cast<simd_t
>(_mm_srli_si128(
reinterpret_cast<__m128i
const &
>(src), index << 1));
Provides seqan3::simd::simd_concept.
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
Provides intrinsics include for builtin simd.
Provides seqan3::simd::simd_traits.