* Copyright (c) 2026 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#include <cmath>
#include <limits>
#include <sstream>
#include "opdev/bfloat16.h"
#include "opdev/float6_e3m2.h"
#include "opdev/float6_e2m3.h"
#include "gtest/gtest.h"
class TestFloat6E3M2 : public testing::Test {
protected:
void SetUp() override {}
void TearDown() override {}
};
TEST_F(TestFloat6E3M2, DefaultConstructor)
{
op::Float6E3M2 val;
EXPECT_EQ(val.value, 0);
EXPECT_TRUE(val.IsZero());
EXPECT_FLOAT_EQ(static_cast<float>(val), 0.0f);
}
TEST_F(TestFloat6E3M2, FromBits)
{
op::Float6E3M2 zero(0x00, op::Float6E3M2::FromBits());
EXPECT_TRUE(zero.IsZero());
op::Float6E3M2 neg_zero(0x20, op::Float6E3M2::FromBits());
EXPECT_TRUE(neg_zero.IsZero());
op::Float6E3M2 max_val(0x1F, op::Float6E3M2::FromBits());
EXPECT_FLOAT_EQ(static_cast<float>(max_val), 28.0f);
EXPECT_FALSE(max_val.IsNaN());
op::Float6E3M2 min_val(0x3F, op::Float6E3M2::FromBits());
EXPECT_FLOAT_EQ(static_cast<float>(min_val), -28.0f);
EXPECT_FALSE(min_val.IsNaN());
op::Float6E3M2 one(0x0C, op::Float6E3M2::FromBits());
EXPECT_FLOAT_EQ(static_cast<float>(one), 1.0f);
op::Float6E3M2 min_normal(0x04, op::Float6E3M2::FromBits());
EXPECT_FLOAT_EQ(static_cast<float>(min_normal), 0.25f);
op::Float6E3M2 val_24(0x1E, op::Float6E3M2::FromBits());
EXPECT_FLOAT_EQ(static_cast<float>(val_24), 24.0f);
}
TEST_F(TestFloat6E3M2, FloatConversion)
{
op::Float6E3M2 one(1.0f);
EXPECT_FLOAT_EQ(static_cast<float>(one), 1.0f);
op::Float6E3M2 two(2.0f);
EXPECT_FLOAT_EQ(static_cast<float>(two), 2.0f);
op::Float6E3M2 half(0.5f);
EXPECT_FLOAT_EQ(static_cast<float>(half), 0.5f);
op::Float6E3M2 neg_one(-1.0f);
EXPECT_FLOAT_EQ(static_cast<float>(neg_one), -1.0f);
op::Float6E3M2 zero(0.0f);
EXPECT_TRUE(zero.IsZero());
op::Float6E3M2 nan_input(std::nanf(""));
EXPECT_FALSE(nan_input.IsNaN());
EXPECT_FLOAT_EQ(static_cast<float>(nan_input), 28.0f);
}
TEST_F(TestFloat6E3M2, OverflowClamp)
{
op::Float6E3M2 large(100.0f);
EXPECT_FLOAT_EQ(static_cast<float>(large), 28.0f);
op::Float6E3M2 inf(std::numeric_limits<float>::infinity());
EXPECT_FLOAT_EQ(static_cast<float>(inf), 28.0f);
op::Float6E3M2 neg_inf(-std::numeric_limits<float>::infinity());
EXPECT_FLOAT_EQ(static_cast<float>(neg_inf), -28.0f);
}
TEST_F(TestFloat6E3M2, ArithmeticOperations)
{
op::Float6E3M2 a(2.0f);
op::Float6E3M2 b(3.0f);
op::Float6E3M2 sum(static_cast<float>(a) + static_cast<float>(b));
EXPECT_FLOAT_EQ(static_cast<float>(sum), 5.0f);
op::Float6E3M2 diff(static_cast<float>(a) - static_cast<float>(b));
EXPECT_FLOAT_EQ(static_cast<float>(diff), -1.0f);
op::Float6E3M2 prod(static_cast<float>(a) * static_cast<float>(b));
EXPECT_FLOAT_EQ(static_cast<float>(prod), 6.0f);
op::Float6E3M2 quot(static_cast<float>(a) / static_cast<float>(b));
EXPECT_NEAR(static_cast<float>(quot), 0.6666667f, 0.1f);
op::Float6E3M2 neg_a(-static_cast<float>(a));
EXPECT_FLOAT_EQ(static_cast<float>(neg_a), -2.0f);
}
TEST_F(TestFloat6E3M2, ComparisonOperations)
{
op::Float6E3M2 a(2.0f);
op::Float6E3M2 b(4.0f);
op::Float6E3M2 c(2.0f);
EXPECT_TRUE(static_cast<float>(a) < static_cast<float>(b));
EXPECT_TRUE(static_cast<float>(a) <= static_cast<float>(b));
EXPECT_TRUE(static_cast<float>(a) <= static_cast<float>(c));
EXPECT_TRUE(static_cast<float>(a) == static_cast<float>(c));
EXPECT_TRUE(static_cast<float>(a) != static_cast<float>(b));
EXPECT_TRUE(static_cast<float>(b) > static_cast<float>(a));
EXPECT_TRUE(static_cast<float>(b) >= static_cast<float>(a));
}
TEST_F(TestFloat6E3M2, CompoundAssignment)
{
op::Float6E3M2 a(2.0f);
a = op::Float6E3M2(static_cast<float>(a) + 3.0f);
EXPECT_FLOAT_EQ(static_cast<float>(a), 5.0f);
a = op::Float6E3M2(static_cast<float>(a) - 1.0f);
EXPECT_FLOAT_EQ(static_cast<float>(a), 4.0f);
a = op::Float6E3M2(static_cast<float>(a) * 2.0f);
EXPECT_FLOAT_EQ(static_cast<float>(a), 8.0f);
a = op::Float6E3M2(static_cast<float>(a) / 2.0f);
EXPECT_FLOAT_EQ(static_cast<float>(a), 4.0f);
}
TEST_F(TestFloat6E3M2, TypeConversion)
{
op::Float6E3M2 val(3.0f);
double d = val;
EXPECT_DOUBLE_EQ(d, 3.0);
float f = val;
EXPECT_FLOAT_EQ(f, 3.0f);
EXPECT_TRUE(static_cast<float>(val) != 0.0f);
op::Float6E3M2 zero(0.0f);
EXPECT_TRUE(static_cast<float>(zero) == 0.0f);
}
TEST_F(TestFloat6E3M2, DoubleConversion)
{
op::Float6E3M2 val_from_double(3.0);
EXPECT_DOUBLE_EQ(static_cast<double>(val_from_double), 3.0);
op::Float6E3M2 val_assign;
val_assign = 2.0;
EXPECT_DOUBLE_EQ(static_cast<double>(val_assign), 2.0);
op::Float6E3M2 val_todouble(4.0);
EXPECT_DOUBLE_EQ(static_cast<double>(val_todouble), 4.0);
double d = val_todouble;
EXPECT_DOUBLE_EQ(d, 4.0);
op::Float6E3M2 neg_val(-2.0);
EXPECT_DOUBLE_EQ(static_cast<double>(neg_val), -2.0);
op::Float6E3M2 large(100.0);
EXPECT_DOUBLE_EQ(static_cast<double>(large), 28.0);
}
TEST_F(TestFloat6E3M2, StdFunctions)
{
op::Float6E3M2 val(4.0f);
op::Float6E3M2 neg_val(-4.0f);
op::Float6E3M2 nan_input(std::nanf(""));
EXPECT_FALSE(std::isinf(val));
EXPECT_FALSE(std::isnan(val));
EXPECT_TRUE(std::isfinite(val));
EXPECT_FLOAT_EQ(std::abs(static_cast<float>(neg_val)), 4.0f);
EXPECT_FALSE(std::isnan(nan_input));
EXPECT_FLOAT_EQ(static_cast<float>(nan_input), 28.0f);
}
TEST_F(TestFloat6E3M2, NumericLimits)
{
EXPECT_FLOAT_EQ(static_cast<float>(std::numeric_limits<op::Float6E3M2>::max()), 28.0f);
EXPECT_FLOAT_EQ(static_cast<float>(std::numeric_limits<op::Float6E3M2>::lowest()), -28.0f);
EXPECT_FLOAT_EQ(static_cast<float>(std::numeric_limits<op::Float6E3M2>::min()), 0.25f);
EXPECT_FALSE(std::isnan(std::numeric_limits<op::Float6E3M2>::quiet_NaN()));
}
TEST_F(TestFloat6E3M2, OutputStream)
{
op::Float6E3M2 val(3.0f);
std::ostringstream oss;
oss << val;
EXPECT_EQ(oss.str(), "3");
}
TEST_F(TestFloat6E3M2, DivisionByZero)
{
op::Float6E3M2 positive(4.0f);
op::Float6E3M2 zero(0.0f);
op::Float6E3M2 result_pos(static_cast<float>(positive) / static_cast<float>(zero));
EXPECT_FLOAT_EQ(static_cast<float>(result_pos), 28.0f) << "Positive/zero should clamp to max (28.0)";
EXPECT_FALSE(result_pos.IsNaN());
op::Float6E3M2 negative(-4.0f);
op::Float6E3M2 result_neg(static_cast<float>(negative) / static_cast<float>(zero));
EXPECT_FLOAT_EQ(static_cast<float>(result_neg), -28.0f) << "Negative/zero should clamp to min (-28.0)";
EXPECT_FALSE(result_neg.IsNaN());
op::Float6E3M2 zero_div_zero(static_cast<float>(zero) / static_cast<float>(zero));
EXPECT_FLOAT_EQ(static_cast<float>(zero_div_zero), 28.0f) << "Zero/zero (NaN) should clamp to max";
EXPECT_FALSE(zero_div_zero.IsNaN());
op::Float6E3M2 a(8.0f);
a = op::Float6E3M2(static_cast<float>(a) / static_cast<float>(zero));
EXPECT_FLOAT_EQ(static_cast<float>(a), 28.0f) << "Compound division by zero should clamp to max";
}
class TestFloat6E2M3 : public testing::Test {
protected:
void SetUp() override {}
void TearDown() override {}
};
TEST_F(TestFloat6E2M3, DefaultConstructor)
{
op::Float6E2M3 val;
EXPECT_EQ(val.value, 0);
EXPECT_TRUE(val.IsZero());
EXPECT_FLOAT_EQ(static_cast<float>(val), 0.0f);
}
TEST_F(TestFloat6E2M3, FromBits)
{
op::Float6E2M3 zero(0x00, op::Float6E2M3::FromBits());
EXPECT_TRUE(zero.IsZero());
op::Float6E2M3 neg_zero(0x20, op::Float6E2M3::FromBits());
EXPECT_TRUE(neg_zero.IsZero());
op::Float6E2M3 max_val(0x1F, op::Float6E2M3::FromBits());
EXPECT_FLOAT_EQ(static_cast<float>(max_val), 7.5f);
EXPECT_FALSE(max_val.IsNaN());
op::Float6E2M3 min_val(0x3F, op::Float6E2M3::FromBits());
EXPECT_FLOAT_EQ(static_cast<float>(min_val), -7.5f);
EXPECT_FALSE(min_val.IsNaN());
op::Float6E2M3 one(0x08, op::Float6E2M3::FromBits());
EXPECT_FLOAT_EQ(static_cast<float>(one), 1.0f);
op::Float6E2M3 two(0x10, op::Float6E2M3::FromBits());
EXPECT_FLOAT_EQ(static_cast<float>(two), 2.0f);
op::Float6E2M3 half(0x04, op::Float6E2M3::FromBits());
EXPECT_FLOAT_EQ(static_cast<float>(half), 0.5f);
}
TEST_F(TestFloat6E2M3, FloatConversion)
{
op::Float6E2M3 one(1.0f);
EXPECT_FLOAT_EQ(static_cast<float>(one), 1.0f);
op::Float6E2M3 two(2.0f);
EXPECT_FLOAT_EQ(static_cast<float>(two), 2.0f);
op::Float6E2M3 half(0.5f);
EXPECT_FLOAT_EQ(static_cast<float>(half), 0.5f);
op::Float6E2M3 neg_one(-1.0f);
EXPECT_FLOAT_EQ(static_cast<float>(neg_one), -1.0f);
op::Float6E2M3 zero(0.0f);
EXPECT_TRUE(zero.IsZero());
op::Float6E2M3 nan_input(std::nanf(""));
EXPECT_FALSE(nan_input.IsNaN());
EXPECT_FLOAT_EQ(static_cast<float>(nan_input), 7.5f);
}
TEST_F(TestFloat6E2M3, OverflowClamp)
{
op::Float6E2M3 large(100.0f);
EXPECT_FLOAT_EQ(static_cast<float>(large), 7.5f);
op::Float6E2M3 inf(std::numeric_limits<float>::infinity());
EXPECT_FLOAT_EQ(static_cast<float>(inf), 7.5f);
op::Float6E2M3 neg_inf(-std::numeric_limits<float>::infinity());
EXPECT_FLOAT_EQ(static_cast<float>(neg_inf), -7.5f);
}
TEST_F(TestFloat6E2M3, ArithmeticOperations)
{
op::Float6E2M3 a(2.0f);
op::Float6E2M3 b(3.0f);
op::Float6E2M3 sum(static_cast<float>(a) + static_cast<float>(b));
EXPECT_NEAR(static_cast<float>(sum), 5.0f, 0.2f);
op::Float6E2M3 diff(static_cast<float>(a) - static_cast<float>(b));
EXPECT_FLOAT_EQ(static_cast<float>(diff), -1.0f);
op::Float6E2M3 prod(static_cast<float>(a) * static_cast<float>(b));
EXPECT_NEAR(static_cast<float>(prod), 6.0f, 0.2f);
op::Float6E2M3 quot(static_cast<float>(a) / static_cast<float>(b));
EXPECT_NEAR(static_cast<float>(quot), 0.6666667f, 0.1f);
op::Float6E2M3 neg_a(-static_cast<float>(a));
EXPECT_FLOAT_EQ(static_cast<float>(neg_a), -2.0f);
}
TEST_F(TestFloat6E2M3, ComparisonOperations)
{
op::Float6E2M3 a(2.0f);
op::Float6E2M3 b(4.0f);
op::Float6E2M3 c(2.0f);
EXPECT_TRUE(static_cast<float>(a) < static_cast<float>(b));
EXPECT_TRUE(static_cast<float>(a) <= static_cast<float>(b));
EXPECT_TRUE(static_cast<float>(a) <= static_cast<float>(c));
EXPECT_TRUE(static_cast<float>(a) == static_cast<float>(c));
EXPECT_TRUE(static_cast<float>(a) != static_cast<float>(b));
EXPECT_TRUE(static_cast<float>(b) > static_cast<float>(a));
EXPECT_TRUE(static_cast<float>(b) >= static_cast<float>(a));
}
TEST_F(TestFloat6E2M3, CompoundAssignment)
{
op::Float6E2M3 a(2.0f);
a = op::Float6E2M3(static_cast<float>(a) + 3.0f);
EXPECT_NEAR(static_cast<float>(a), 5.0f, 0.2f);
a = op::Float6E2M3(static_cast<float>(a) - 1.0f);
EXPECT_NEAR(static_cast<float>(a), 4.0f, 0.2f);
a = op::Float6E2M3(static_cast<float>(a) * 1.5f);
EXPECT_NEAR(static_cast<float>(a), 6.0f, 0.3f);
a = op::Float6E2M3(static_cast<float>(a) / 2.0f);
EXPECT_NEAR(static_cast<float>(a), 3.0f, 0.2f);
}
TEST_F(TestFloat6E2M3, TypeConversion)
{
op::Float6E2M3 val(3.0f);
double d = val;
EXPECT_DOUBLE_EQ(d, 3.0);
float f = val;
EXPECT_FLOAT_EQ(f, 3.0f);
EXPECT_TRUE(static_cast<float>(val) != 0.0f);
op::Float6E2M3 zero(0.0f);
EXPECT_TRUE(static_cast<float>(zero) == 0.0f);
}
TEST_F(TestFloat6E2M3, DoubleConversion)
{
op::Float6E2M3 val_from_double(3.0);
EXPECT_NEAR(static_cast<double>(val_from_double), 3.0, 0.2);
op::Float6E2M3 val_assign;
val_assign = 2.0;
EXPECT_DOUBLE_EQ(static_cast<double>(val_assign), 2.0);
op::Float6E2M3 val_todouble(4.0);
EXPECT_NEAR(static_cast<double>(val_todouble), 4.0, 0.2);
double d = val_todouble;
EXPECT_NEAR(d, 4.0, 0.2);
op::Float6E2M3 neg_val(-2.0);
EXPECT_DOUBLE_EQ(static_cast<double>(neg_val), -2.0);
op::Float6E2M3 large(100.0);
EXPECT_DOUBLE_EQ(static_cast<double>(large), 7.5);
}
TEST_F(TestFloat6E2M3, StdFunctions)
{
op::Float6E2M3 val(4.0f);
op::Float6E2M3 neg_val(-4.0f);
op::Float6E2M3 nan_input(std::nanf(""));
EXPECT_FALSE(std::isinf(val));
EXPECT_FALSE(std::isnan(val));
EXPECT_TRUE(std::isfinite(val));
EXPECT_FLOAT_EQ(std::abs(static_cast<float>(neg_val)), 4.0f);
EXPECT_FALSE(std::isnan(nan_input));
EXPECT_FLOAT_EQ(static_cast<float>(nan_input), 7.5f);
}
TEST_F(TestFloat6E2M3, NumericLimits)
{
EXPECT_FLOAT_EQ(static_cast<float>(std::numeric_limits<op::Float6E2M3>::max()), 7.5f);
EXPECT_FLOAT_EQ(static_cast<float>(std::numeric_limits<op::Float6E2M3>::lowest()), -7.5f);
EXPECT_FLOAT_EQ(static_cast<float>(std::numeric_limits<op::Float6E2M3>::min()), 1.0f);
EXPECT_FALSE(std::isnan(std::numeric_limits<op::Float6E2M3>::quiet_NaN()));
}
TEST_F(TestFloat6E2M3, OutputStream)
{
op::Float6E2M3 val(3.0f);
std::ostringstream oss;
oss << val;
EXPECT_EQ(oss.str(), "3");
}
TEST_F(TestFloat6E2M3, DivisionByZero)
{
op::Float6E2M3 positive(4.0f);
op::Float6E2M3 zero(0.0f);
op::Float6E2M3 result_pos(static_cast<float>(positive) / static_cast<float>(zero));
EXPECT_FLOAT_EQ(static_cast<float>(result_pos), 7.5f) << "Positive/zero should clamp to max (7.5)";
EXPECT_FALSE(result_pos.IsNaN());
op::Float6E2M3 negative(-4.0f);
op::Float6E2M3 result_neg(static_cast<float>(negative) / static_cast<float>(zero));
EXPECT_FLOAT_EQ(static_cast<float>(result_neg), -7.5f) << "Negative/zero should clamp to min (-7.5)";
EXPECT_FALSE(result_neg.IsNaN());
op::Float6E2M3 zero_div_zero(static_cast<float>(zero) / static_cast<float>(zero));
EXPECT_FLOAT_EQ(static_cast<float>(zero_div_zero), 7.5f) << "Zero/zero (NaN) should clamp to max";
EXPECT_FALSE(zero_div_zero.IsNaN());
op::Float6E2M3 a(4.0f);
a = op::Float6E2M3(static_cast<float>(a) / static_cast<float>(zero));
EXPECT_FLOAT_EQ(static_cast<float>(a), 7.5f) << "Compound division by zero should clamp to max";
}
TEST(Float6CrossType, RangeComparison)
{
op::Float6E3M2 e3m2_large(20.0f);
EXPECT_FLOAT_EQ(static_cast<float>(e3m2_large), 20.0f);
op::Float6E2M3 e2m3_large(6.0f);
EXPECT_FLOAT_EQ(static_cast<float>(e2m3_large), 6.0f);
op::Float6E2M3 e2m3_overflow(20.0f);
EXPECT_FLOAT_EQ(static_cast<float>(e2m3_overflow), 7.5f);
}
TEST(Float6CrossType, PrecisionComparison)
{
float test_val = 1.125f;
op::Float6E3M2 e3m2(test_val);
op::Float6E2M3 e2m3(test_val);
EXPECT_NEAR(static_cast<float>(e2m3), 1.125f, 0.01f);
}
TEST(Float6CrossType, BitPatterns)
{
op::Float6E3M2 e3m2_one(1.0f);
EXPECT_EQ(e3m2_one.value & 0x3F, 0x0C);
op::Float6E2M3 e2m3_one(1.0f);
EXPECT_EQ(e2m3_one.value & 0x3F, 0x08);
}
TEST(Float6Fp16Conversion, E3M2FromFp16)
{
op::fp16_t fp16_val(4.0f);
op::Float6E3M2 e3m2_from_fp16(fp16_val);
EXPECT_NEAR(static_cast<float>(e3m2_from_fp16), 4.0f, 0.2f);
}
TEST(Float6Fp16Conversion, E3M2ToFp16)
{
op::Float6E3M2 e3m2(8.0f);
op::fp16_t fp16_result(static_cast<float>(e3m2));
EXPECT_NEAR(static_cast<float>(fp16_result), 8.0f, 0.5f);
}
TEST(Float6Fp16Conversion, E2M3FromFp16)
{
op::fp16_t fp16_val(3.0f);
op::Float6E2M3 e2m3_from_fp16(fp16_val);
EXPECT_NEAR(static_cast<float>(e2m3_from_fp16), 3.0f, 0.2f);
}
TEST(Float6Fp16Conversion, E2M3ToFp16)
{
op::Float6E2M3 e2m3(5.0f);
op::fp16_t fp16_result(static_cast<float>(e2m3));
EXPECT_NEAR(static_cast<float>(fp16_result), static_cast<float>(e2m3), 0.5f);
}
TEST(Float6BFloat16Conversion, E3M2FromBFloat16)
{
op::bfloat16 bf16_val(6.0f);
op::Float6E3M2 e3m2_from_bf16(bf16_val);
EXPECT_NEAR(static_cast<float>(e3m2_from_bf16), 6.0f, 0.3f);
}
TEST(Float6BFloat16Conversion, E3M2ToBFloat16)
{
op::Float6E3M2 e3m2(12.0f);
op::bfloat16 bf16_result(static_cast<float>(e3m2));
EXPECT_NEAR(static_cast<float>(bf16_result), 12.0f, 0.5f);
}
TEST(Float6BFloat16Conversion, E2M3FromBFloat16)
{
op::bfloat16 bf16_val(2.5f);
op::Float6E2M3 e2m3_from_bf16(bf16_val);
EXPECT_NEAR(static_cast<float>(e2m3_from_bf16), 2.5f, 0.1f);
}
TEST(Float6BFloat16Conversion, E2M3ToBFloat16)
{
op::Float6E2M3 e2m3(4.0f);
op::bfloat16 bf16_result(static_cast<float>(e2m3));
EXPECT_NEAR(static_cast<float>(bf16_result), static_cast<float>(e2m3), 0.5f);
}