#ifndef OMNI_INSIDE_INL_H
#define OMNI_INSIDE_INL_H
#if (!OMNI_HAVE_SCALABLE && !OMNI_TARGET_IS_SVE)
template <class D> struct Vec2 {
VFromD<D> v0;
VFromD<D> v1;
};
template <class D> struct Vec3 {
VFromD<D> v0;
VFromD<D> v1;
VFromD<D> v2;
};
template <class D> struct Vec4 {
VFromD<D> v0;
VFromD<D> v1;
VFromD<D> v2;
VFromD<D> v3;
};
template <class D> OMNI_API Vec2<D> Create2(D , VFromD<D> v0, VFromD<D> v1)
{
return Vec2<D>{ v0, v1 };
}
template <class D> OMNI_API Vec3<D> Create3(D , VFromD<D> v0, VFromD<D> v1, VFromD<D> v2)
{
return Vec3<D>{ v0, v1, v2 };
}
template <class D> OMNI_API Vec4<D> Create4(D , VFromD<D> v0, VFromD<D> v1, VFromD<D> v2, VFromD<D> v3)
{
return Vec4<D>{ v0, v1, v2, v3 };
}
template <size_t kIndex, class D> OMNI_API VFromD<D> Get2(Vec2<D> tuple)
{
static_assert(kIndex < 2, "Tuple index out of bounds");
return kIndex == 0 ? tuple.v0 : tuple.v1;
}
template <size_t kIndex, class D> OMNI_API VFromD<D> Get3(Vec3<D> tuple)
{
static_assert(kIndex < 3, "Tuple index out of bounds");
return kIndex == 0 ? tuple.v0 : kIndex == 1 ? tuple.v1 : tuple.v2;
}
template <size_t kIndex, class D> OMNI_API VFromD<D> Get4(Vec4<D> tuple)
{
static_assert(kIndex < 4, "Tuple index out of bounds");
return kIndex == 0 ? tuple.v0 : kIndex == 1 ? tuple.v1 : kIndex == 2 ? tuple.v2 : tuple.v3;
}
template <size_t kIndex, class D> OMNI_API Vec2<D> Set2(Vec2<D> tuple, VFromD<D> val)
{
static_assert(kIndex < 2, "Tuple index out of bounds");
if (kIndex == 0) {
tuple.v0 = val;
} else {
tuple.v1 = val;
}
return tuple;
}
template <size_t kIndex, class D> OMNI_API Vec3<D> Set3(Vec3<D> tuple, VFromD<D> val)
{
static_assert(kIndex < 3, "Tuple index out of bounds");
if (kIndex == 0) {
tuple.v0 = val;
} else if (kIndex == 1) {
tuple.v1 = val;
} else {
tuple.v2 = val;
}
return tuple;
}
template <size_t kIndex, class D> OMNI_API Vec4<D> Set4(Vec4<D> tuple, VFromD<D> val)
{
static_assert(kIndex < 4, "Tuple index out of bounds");
if (kIndex == 0) {
tuple.v0 = val;
} else if (kIndex == 1) {
tuple.v1 = val;
} else if (kIndex == 2) {
tuple.v2 = val;
} else {
tuple.v3 = val;
}
return tuple;
}
#endif
#if (defined(OMNI_NATIVE_ROL_ROR_8) == defined(OMNI_TARGET_TOGGLE))
#ifdef OMNI_NATIVE_ROL_ROR_8
#undef OMNI_NATIVE_ROL_ROR_8
#else
#define OMNI_NATIVE_ROL_ROR_8
#endif
template <class V, OMNI_IF_UI8(TFromV<V>)> OMNI_API V Rol(V a, V b)
{
const DFromV<decltype(a)> d;
const RebindToSigned<decltype(d)> di;
const RebindToUnsigned<decltype(d)> du;
const auto shift_amt_mask = Set(du, uint8_t{ 7 });
const auto shl_amt = And(BitCast(du, b), shift_amt_mask);
const auto shr_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
const auto vu = BitCast(du, a);
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
}
template <class V, OMNI_IF_UI8(TFromV<V>)> OMNI_API V Ror(V a, V b)
{
const DFromV<decltype(a)> d;
const RebindToSigned<decltype(d)> di;
const RebindToUnsigned<decltype(d)> du;
const auto shift_amt_mask = Set(du, uint8_t{ 7 });
const auto shr_amt = And(BitCast(du, b), shift_amt_mask);
const auto shl_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
const auto vu = BitCast(du, a);
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
}
#endif
#if (defined(OMNI_NATIVE_ROL_ROR_16) == defined(OMNI_TARGET_TOGGLE))
#ifdef OMNI_NATIVE_ROL_ROR_16
#undef OMNI_NATIVE_ROL_ROR_16
#else
#define OMNI_NATIVE_ROL_ROR_16
#endif
template <class V, OMNI_IF_UI16(TFromV<V>)> OMNI_API V Rol(V a, V b)
{
const DFromV<decltype(a)> d;
const RebindToSigned<decltype(d)> di;
const RebindToUnsigned<decltype(d)> du;
const auto shift_amt_mask = Set(du, uint16_t{ 15 });
const auto shl_amt = And(BitCast(du, b), shift_amt_mask);
const auto shr_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
const auto vu = BitCast(du, a);
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
}
template <class V, OMNI_IF_UI16(TFromV<V>)> OMNI_API V Ror(V a, V b)
{
const DFromV<decltype(a)> d;
const RebindToSigned<decltype(d)> di;
const RebindToUnsigned<decltype(d)> du;
const auto shift_amt_mask = Set(du, uint16_t{ 15 });
const auto shr_amt = And(BitCast(du, b), shift_amt_mask);
const auto shl_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
const auto vu = BitCast(du, a);
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
}
#endif
#if (defined(OMNI_NATIVE_ROL_ROR_32_64) == defined(OMNI_TARGET_TOGGLE))
#ifdef OMNI_NATIVE_ROL_ROR_32_64
#undef OMNI_NATIVE_ROL_ROR_32_64
#else
#define OMNI_NATIVE_ROL_ROR_32_64
#endif
template <class V, OMNI_IF_UI32(TFromV<V>)> OMNI_API V Rol(V a, V b)
{
const DFromV<decltype(a)> d;
const RebindToSigned<decltype(d)> di;
const RebindToUnsigned<decltype(d)> du;
const auto shift_amt_mask = Set(du, uint32_t{ 31 });
const auto shl_amt = And(BitCast(du, b), shift_amt_mask);
const auto shr_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
const auto vu = BitCast(du, a);
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
}
template <class V, OMNI_IF_UI32(TFromV<V>)> OMNI_API V Ror(V a, V b)
{
const DFromV<decltype(a)> d;
const RebindToSigned<decltype(d)> di;
const RebindToUnsigned<decltype(d)> du;
const auto shift_amt_mask = Set(du, uint32_t{ 31 });
const auto shr_amt = And(BitCast(du, b), shift_amt_mask);
const auto shl_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
const auto vu = BitCast(du, a);
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
}
#if OMNI_HAVE_INTEGER64
template <class V, OMNI_IF_UI64(TFromV<V>)> OMNI_API V Rol(V a, V b)
{
const DFromV<decltype(a)> d;
const RebindToSigned<decltype(d)> di;
const RebindToUnsigned<decltype(d)> du;
const auto shift_amt_mask = Set(du, uint64_t{ 63 });
const auto shl_amt = And(BitCast(du, b), shift_amt_mask);
const auto shr_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
const auto vu = BitCast(du, a);
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
}
template <class V, OMNI_IF_UI64(TFromV<V>)> OMNI_API V Ror(V a, V b)
{
const DFromV<decltype(a)> d;
const RebindToSigned<decltype(d)> di;
const RebindToUnsigned<decltype(d)> du;
const auto shift_amt_mask = Set(du, uint64_t{ 63 });
const auto shr_amt = And(BitCast(du, b), shift_amt_mask);
const auto shl_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
const auto vu = BitCast(du, a);
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
}
#endif
#endif
#if (defined(OMNI_NATIVE_ROL_ROR_SAME_8) == defined(OMNI_TARGET_TOGGLE))
#ifdef OMNI_NATIVE_ROL_ROR_SAME_8
#undef OMNI_NATIVE_ROL_ROR_SAME_8
#else
#define OMNI_NATIVE_ROL_ROR_SAME_8
#endif
template <class V, OMNI_IF_UI8(TFromV<V>)> OMNI_API V RotateLeftSame(V v, int bits)
{
const DFromV<decltype(v)> d;
const RebindToUnsigned<decltype(d)> du;
const int shl_amt = bits & 7;
const int shr_amt = static_cast<int>((0u - static_cast<unsigned>(bits)) & 7u);
const auto vu = BitCast(du, v);
return BitCast(d, Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
}
template <class V, OMNI_IF_UI8(TFromV<V>)> OMNI_API V RotateRightSame(V v, int bits)
{
const DFromV<decltype(v)> d;
const RebindToUnsigned<decltype(d)> du;
const int shr_amt = bits & 7;
const int shl_amt = static_cast<int>((0u - static_cast<unsigned>(bits)) & 7u);
const auto vu = BitCast(du, v);
return BitCast(d, Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
}
#endif
#if (defined(OMNI_NATIVE_ROL_ROR_SAME_16) == defined(OMNI_TARGET_TOGGLE))
#ifdef OMNI_NATIVE_ROL_ROR_SAME_16
#undef OMNI_NATIVE_ROL_ROR_SAME_16
#else
#define OMNI_NATIVE_ROL_ROR_SAME_16
#endif
template <class V, OMNI_IF_UI16(TFromV<V>)> OMNI_API V RotateLeftSame(V v, int bits)
{
const DFromV<decltype(v)> d;
const RebindToUnsigned<decltype(d)> du;
const int shl_amt = bits & 15;
const int shr_amt = static_cast<int>((0u - static_cast<unsigned>(bits)) & 15u);
const auto vu = BitCast(du, v);
return BitCast(d, Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
}
template <class V, OMNI_IF_UI16(TFromV<V>)> OMNI_API V RotateRightSame(V v, int bits)
{
const DFromV<decltype(v)> d;
const RebindToUnsigned<decltype(d)> du;
const int shr_amt = bits & 15;
const int shl_amt = static_cast<int>((0u - static_cast<unsigned>(bits)) & 15u);
const auto vu = BitCast(du, v);
return BitCast(d, Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
}
#endif
#if (defined(OMNI_NATIVE_ROL_ROR_SAME_32_64) == defined(OMNI_TARGET_TOGGLE))
#ifdef OMNI_NATIVE_ROL_ROR_SAME_32_64
#undef OMNI_NATIVE_ROL_ROR_SAME_32_64
#else
#define OMNI_NATIVE_ROL_ROR_SAME_32_64
#endif
template <class V, OMNI_IF_UI32(TFromV<V>)> OMNI_API V RotateLeftSame(V v, int bits)
{
const DFromV<decltype(v)> d;
const RebindToUnsigned<decltype(d)> du;
const int shl_amt = bits & 31;
const int shr_amt = static_cast<int>((0u - static_cast<unsigned>(bits)) & 31u);
const auto vu = BitCast(du, v);
return BitCast(d, Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
}
template <class V, OMNI_IF_UI32(TFromV<V>)> OMNI_API V RotateRightSame(V v, int bits)
{
const DFromV<decltype(v)> d;
const RebindToUnsigned<decltype(d)> du;
const int shr_amt = bits & 31;
const int shl_amt = static_cast<int>((0u - static_cast<unsigned>(bits)) & 31u);
const auto vu = BitCast(du, v);
return BitCast(d, Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
}
#if OMNI_HAVE_INTEGER64
template <class V, OMNI_IF_UI64(TFromV<V>)> OMNI_API V RotateLeftSame(V v, int bits)
{
const DFromV<decltype(v)> d;
const RebindToUnsigned<decltype(d)> du;
const int shl_amt = bits & 63;
const int shr_amt = static_cast<int>((0u - static_cast<unsigned>(bits)) & 63u);
const auto vu = BitCast(du, v);
return BitCast(d, Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
}
template <class V, OMNI_IF_UI64(TFromV<V>)> OMNI_API V RotateRightSame(V v, int bits)
{
const DFromV<decltype(v)> d;
const RebindToUnsigned<decltype(d)> du;
const int shr_amt = bits & 63;
const int shl_amt = static_cast<int>((0u - static_cast<unsigned>(bits)) & 63u);
const auto vu = BitCast(du, v);
return BitCast(d, Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
}
#endif
#endif
#if OMNI_TARGET != OMNI_SCALAR || OMNI_IDE
namespace detail {
template <size_t kToLaneSize, class D, class V>
OMNI_INLINE VFromD<D> PromoteEvenTo(simd::SignedTag ,
simd::SizeTag<kToLaneSize> , simd::SignedTag , D d_to, V v)
{
#if OMNI_TARGET_IS_SVE
return NativePromoteEvenTo(BitCast(d_to, v));
#else
#if OMNI_IS_LITTLE_ENDIAN
const auto even_in_hi = ShiftLeft<kToLaneSize * 4>(BitCast(d_to, v));
#else
const auto even_in_hi = BitCast(d_to, v);
#endif
return ShiftRight<kToLaneSize * 4>(even_in_hi);
#endif
}
template <size_t kToLaneSize, class D, class V>
OMNI_INLINE VFromD<D> PromoteEvenTo(simd::UnsignedTag ,
simd::SizeTag<kToLaneSize> , simd::UnsignedTag , D d_to, V v)
{
#if OMNI_TARGET_IS_SVE
return NativePromoteEvenTo(BitCast(d_to, v));
#else
#if OMNI_IS_LITTLE_ENDIAN
return And(BitCast(d_to, v), Set(d_to, static_cast<TFromD<D>>(LimitsMax<TFromV<V>>())));
#else
return ShiftRight<kToLaneSize * 4>(BitCast(d_to, v));
#endif
#endif
}
template <size_t kToLaneSize, class D, class V>
OMNI_INLINE VFromD<D> PromoteOddTo(simd::SignedTag , simd::SizeTag<kToLaneSize> ,
simd::SignedTag , D d_to, V v)
{
#if OMNI_IS_LITTLE_ENDIAN
const auto odd_in_hi = BitCast(d_to, v);
#else
const auto odd_in_hi = ShiftLeft<kToLaneSize * 4>(BitCast(d_to, v));
#endif
return ShiftRight<kToLaneSize * 4>(odd_in_hi);
}
template <size_t kToLaneSize, class D, class V>
OMNI_INLINE VFromD<D> PromoteOddTo(simd::UnsignedTag ,
simd::SizeTag<kToLaneSize> , simd::UnsignedTag , D d_to, V v)
{
#if OMNI_IS_LITTLE_ENDIAN
return ShiftRight<kToLaneSize * 4>(BitCast(d_to, v));
#else
return And(BitCast(d_to, v), Set(d_to, static_cast<TFromD<D>>(LimitsMax<TFromV<V>>())));
#endif
}
template <size_t kToLaneSize, class D, class V>
OMNI_INLINE VFromD<D> PromoteEvenTo(simd::SignedTag ,
simd::SizeTag<kToLaneSize> , simd::UnsignedTag , D d_to, V v)
{
const RebindToUnsigned<decltype(d_to)> du_to;
return BitCast(d_to,
PromoteEvenTo(simd::UnsignedTag(), simd::SizeTag<kToLaneSize>(), simd::UnsignedTag(), du_to, v));
}
template <size_t kToLaneSize, class D, class V>
OMNI_INLINE VFromD<D> PromoteOddTo(simd::SignedTag , simd::SizeTag<kToLaneSize> ,
simd::UnsignedTag , D d_to, V v)
{
const RebindToUnsigned<decltype(d_to)> du_to;
return BitCast(d_to,
PromoteOddTo(simd::UnsignedTag(), simd::SizeTag<kToLaneSize>(), simd::UnsignedTag(), du_to, v));
}
template <class FromTypeTag, class DF32, class VBF16, class VBF16_2 = VFromD<Repartition<bfloat16_t, DF32>>,
simd::EnableIf<IsSame<TFromV<VBF16>, TFromV<VBF16_2>>()> * = nullptr>
OMNI_INLINE VFromD<DF32> PromoteEvenTo(simd::FloatTag , simd::SizeTag<4> ,
FromTypeTag , DF32 d_to, VBF16 v)
{
const RebindToUnsigned<decltype(d_to)> du_to;
#if OMNI_IS_LITTLE_ENDIAN
return BitCast(d_to, ShiftLeft<16>(BitCast(du_to, v)));
#else
return BitCast(d_to, And(BitCast(du_to, v), Set(du_to, uint32_t{ 0xFFFF0000u })));
#endif
}
template <class FromTypeTag, class DF32, class VBF16, class VBF16_2 = VFromD<Repartition<bfloat16_t, DF32>>,
simd::EnableIf<IsSame<TFromV<VBF16>, TFromV<VBF16_2>>()> * = nullptr>
OMNI_INLINE VFromD<DF32> PromoteOddTo(simd::FloatTag , simd::SizeTag<4> ,
FromTypeTag , DF32 d_to, VBF16 v)
{
const RebindToUnsigned<decltype(d_to)> du_to;
#if OMNI_IS_LITTLE_ENDIAN
return BitCast(d_to, And(BitCast(du_to, v), Set(du_to, uint32_t{ 0xFFFF0000u })));
#else
return BitCast(d_to, ShiftLeft<16>(BitCast(du_to, v)));
#endif
}
template <class ToTypeTag, size_t kToLaneSize, class FromTypeTag, class D, class V, OMNI_IF_LANES_D(D, 1)>
OMNI_INLINE VFromD<D> PromoteEvenTo(ToTypeTag , simd::SizeTag<kToLaneSize> ,
FromTypeTag , D d_to, V v)
{
return PromoteLowerTo(d_to, v);
}
template <class ToTypeTag, size_t kToLaneSize, class FromTypeTag, class D, class V, OMNI_IF_LANES_GT_D(D, 1)>
OMNI_INLINE VFromD<D> PromoteEvenTo(ToTypeTag , simd::SizeTag<kToLaneSize> ,
FromTypeTag , D d_to, V v)
{
const DFromV<decltype(v)> d;
return PromoteLowerTo(d_to, ConcatEven(d, v, v));
}
template <class ToTypeTag, size_t kToLaneSize, class FromTypeTag, class D, class V>
OMNI_INLINE VFromD<D> PromoteOddTo(ToTypeTag , simd::SizeTag<kToLaneSize> ,
FromTypeTag , D d_to, V v)
{
const DFromV<decltype(v)> d;
return PromoteLowerTo(d_to, ConcatOdd(d, v, v));
}
}
template <class D, class V, OMNI_IF_T_SIZE_D(D, 2 * sizeof(TFromV<V>)), class V2 = VFromD<Repartition<TFromV<V>, D>>,
OMNI_IF_LANES_D(DFromV<V>, OMNI_MAX_LANES_V(V2))>
OMNI_API VFromD<D> PromoteEvenTo(D d, V v)
{
return detail::PromoteEvenTo(simd::TypeTag<TFromD<D>>(), simd::SizeTag<sizeof(TFromD<D>)>(),
simd::TypeTag<TFromV<V>>(), d, v);
}
template <class D, class V, OMNI_IF_T_SIZE_D(D, 2 * sizeof(TFromV<V>)), class V2 = VFromD<Repartition<TFromV<V>, D>>,
OMNI_IF_LANES_D(DFromV<V>, OMNI_MAX_LANES_V(V2))>
OMNI_API VFromD<D> PromoteOddTo(D d, V v)
{
return detail::PromoteOddTo(simd::TypeTag<TFromD<D>>(), simd::SizeTag<sizeof(TFromD<D>)>(),
simd::TypeTag<TFromV<V>>(), d, v);
}
#endif
#ifdef OMNI_INSIDE_END_NAMESPACE
#undef OMNI_INSIDE_END_NAMESPACE
}
OMNI_AFTER_NAMESPACE();
#endif
#endif