* Copyright (c) 2026 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#ifndef OP_API_HIFP8_H
#define OP_API_HIFP8_H
#include <cstdint>
#include <iostream>
#include <limits>
namespace op {
struct HiFloat8 {
struct FromBitsTag {};
static constexpr FromBitsTag FromBits() { return FromBitsTag(); }
uint8_t value;
static constexpr uint8_t HIF8_ZERO_VALUE = 0;
static constexpr uint8_t HIF8_NAN_VALUE = 0b10000000;
static constexpr uint8_t HIF8_INF_VALUE = 0b01101111;
constexpr HiFloat8() : value(HIF8_ZERO_VALUE) {}
constexpr HiFloat8(uint8_t bits, [[maybe_unused]] FromBitsTag fromBits) : value(bits) {}
HiFloat8(float f32);
operator float() const;
bool IsNaN() const;
bool IsInf() const;
bool IsZero() const;
static uint8_t BitsFromFp32(uint32_t f32);
private:
static float Hifp8ToFloat(HiFloat8 hif8);
};
inline std::ostream& operator<<(std::ostream& os, const HiFloat8& dt)
{
os << static_cast<float>(dt);
return os;
}
static_assert(sizeof(HiFloat8) == sizeof(uint8_t), "sizeof HiFloat8 must be 1");
}
namespace std {
inline bool isinf(const op::HiFloat8& a) { return a.IsInf(); }
inline bool isnan(const op::HiFloat8& a) { return a.IsNaN(); }
inline bool isfinite(const op::HiFloat8& a) { return !a.IsInf() && !a.IsNaN(); }
template <>
class numeric_limits<op::HiFloat8> {
public:
static constexpr bool has_infinity = true;
static constexpr bool has_quiet_NaN = true;
static constexpr bool has_signaling_NaN = true;
static constexpr bool is_bounded = true;
static constexpr bool is_exact = false;
static constexpr bool is_integer = false;
static constexpr bool is_iec559 = false;
static constexpr bool is_modulo = false;
static constexpr bool is_signed = true;
static constexpr bool is_specialized = true;
static constexpr int digits = 3;
static constexpr int digits10 = 0;
static constexpr int max_digits10 = 2;
static constexpr int min_exponent = -22;
static constexpr int min_exponent10 = -6;
static constexpr int max_exponent = 15;
static constexpr int max_exponent10 = 4;
static constexpr int radix = 2;
static constexpr op::HiFloat8 min()
{
return op::HiFloat8(0b00001000, op::HiFloat8::FromBits());
}
static constexpr op::HiFloat8 lowest()
{
return op::HiFloat8(0b11111111, op::HiFloat8::FromBits());
}
static constexpr op::HiFloat8 max()
{
return op::HiFloat8(0b01101111, op::HiFloat8::FromBits());
}
static constexpr op::HiFloat8 epsilon()
{
return op::HiFloat8(0b00000001, op::HiFloat8::FromBits());
}
static constexpr op::HiFloat8 round_error()
{
return op::HiFloat8(0b00011000, op::HiFloat8::FromBits());
}
static constexpr op::HiFloat8 infinity()
{
return op::HiFloat8(op::HiFloat8::HIF8_INF_VALUE, op::HiFloat8::FromBits());
}
static constexpr op::HiFloat8 quiet_NaN()
{
return op::HiFloat8(op::HiFloat8::HIF8_NAN_VALUE, op::HiFloat8::FromBits());
}
static constexpr op::HiFloat8 signaling_NaN()
{
return op::HiFloat8(op::HiFloat8::HIF8_NAN_VALUE, op::HiFloat8::FromBits());
}
static constexpr op::HiFloat8 denorm_min()
{
return op::HiFloat8(0b00000001, op::HiFloat8::FromBits());
}
};
}
#endif