#include "src/math/tan.h"
#include "hdr/errno_macros.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/double_double.h"
#include "src/__support/FPUtil/dyadic_float.h"
#include "src/__support/FPUtil/except_value_utils.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h"
#include "src/__support/macros/properties/cpu_features.h"
#ifdef LIBC_TARGET_CPU_HAS_FMA
#include "range_reduction_double_fma.h"
#define FAST_PASS_EXPONENT 16
using LIBC_NAMESPACE::fma::ONE_TWENTY_EIGHT_OVER_PI;
using LIBC_NAMESPACE::fma::range_reduction_small;
using LIBC_NAMESPACE::fma::SIN_K_PI_OVER_128;
LIBC_INLINE constexpr bool NO_FMA = false;
#else
#include "range_reduction_double_nofma.h"
using LIBC_NAMESPACE::nofma::FAST_PASS_EXPONENT;
using LIBC_NAMESPACE::nofma::ONE_TWENTY_EIGHT_OVER_PI;
using LIBC_NAMESPACE::nofma::range_reduction_small;
using LIBC_NAMESPACE::nofma::SIN_K_PI_OVER_128;
LIBC_INLINE constexpr bool NO_FMA = true;
#endif
#include "range_reduction_double_common.h"
#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0)
#define LIBC_MATH_TAN_SKIP_ACCURATE_PASS
#endif
namespace LIBC_NAMESPACE_DECL {
using DoubleDouble = fputil::DoubleDouble;
using Float128 = typename fputil::DyadicFloat<128>;
namespace {
LIBC_INLINE DoubleDouble tan_eval(const DoubleDouble &u) {
double u_hi_sq = u.hi * u.hi;
double p1 =
fputil::multiply_add(u_hi_sq, 0x1.664f4882c10fap-6, 0x1.ba1ba1ba1ba1cp-5);
double p2 =
fputil::multiply_add(u_hi_sq, 0x1.1111111111111p-3, 0x1.5555555555555p-2);
double q1 = fputil::multiply_add(u_hi_sq, 0x1.5555555555555p-1, 1.0);
double u_hi_3 = u_hi_sq * u.hi;
double u_hi_4 = u_hi_sq * u_hi_sq;
double p3 = fputil::multiply_add(u_hi_4, p1, p2);
double q2 = fputil::multiply_add(u_hi_sq, q1, 1.0);
double tan_lo = fputil::multiply_add(u_hi_3, p3, u.lo * q2);
return fputil::exact_add(u.hi, tan_lo);
}
[[maybe_unused]] Float128 tan_eval(const Float128 &u) {
Float128 u_sq = fputil::quick_mul(u, u);
constexpr Float128 TAN_COEFFS[] = {
{Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128},
{Sign::POS, -129, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128},
{Sign::POS, -130, 0x88888888'88888888'88888888'88888889_u128},
{Sign::POS, -132, 0xdd0dd0dd'0dd0dd0d'd0dd0dd0'dd0dd0dd_u128},
{Sign::POS, -133, 0xb327a441'6087cf99'6b5dd24e'ec0b327a_u128},
{Sign::POS, -134,
0x91371aaf'3611e47a'da8e1cba'7d900eca_u128},
{Sign::POS, -136,
0xeb69e870'abeefdaf'e606d2e4'd1e65fbc_u128},
{Sign::POS, -137,
0xbed1b229'5baf15b5'0ec9af45'a2619971_u128},
{Sign::POS, -138,
0x9aac1240'1b3a2291'1b2ac7e3'e4627d0a_u128},
};
return fputil::quick_mul(
u, fputil::polyeval(u_sq, TAN_COEFFS[0], TAN_COEFFS[1], TAN_COEFFS[2],
TAN_COEFFS[3], TAN_COEFFS[4], TAN_COEFFS[5],
TAN_COEFFS[6], TAN_COEFFS[7], TAN_COEFFS[8]));
}
[[maybe_unused]] Float128 newton_raphson_div(const Float128 &a, Float128 b,
double q) {
Float128 q0(q);
constexpr Float128 TWO(2.0);
b.sign = (b.sign == Sign::POS) ? Sign::NEG : Sign::POS;
Float128 q1 =
fputil::quick_mul(q0, fputil::quick_add(TWO, fputil::quick_mul(b, q0)));
Float128 q2 =
fputil::quick_mul(q1, fputil::quick_add(TWO, fputil::quick_mul(b, q1)));
return fputil::quick_mul(a, q2);
}
}
LLVM_LIBC_FUNCTION(double, tan, (double x)) {
using FPBits = typename fputil::FPBits<double>;
FPBits xbits(x);
uint16_t x_e = xbits.get_biased_exponent();
DoubleDouble y;
unsigned k;
generic::LargeRangeReduction<NO_FMA> range_reduction_large{};
if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) {
if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 27)) {
if (LIBC_UNLIKELY(x == 0.0))
return x;
#ifdef LIBC_TARGET_CPU_HAS_FMA
return fputil::multiply_add(x, 0x1.0p-54, x);
#else
if (LIBC_UNLIKELY(x_e < 4)) {
int rounding_mode = fputil::quick_get_round();
if (rounding_mode == FE_TOWARDZERO ||
(xbits.sign() == Sign::POS && rounding_mode == FE_DOWNWARD) ||
(xbits.sign() == Sign::NEG && rounding_mode == FE_UPWARD))
return FPBits(xbits.uintval() + 1).get_val();
}
return fputil::multiply_add(x, 0x1.0p-54, x);
#endif
}
k = range_reduction_small(x, y);
} else {
if (LIBC_UNLIKELY(x_e > 2 * FPBits::EXP_BIAS)) {
if (xbits.get_mantissa() == 0) {
fputil::set_errno_if_required(EDOM);
fputil::raise_except_if_required(FE_INVALID);
}
return x + FPBits::quiet_nan().get_val();
}
k = range_reduction_large.compute_high_part(x);
y = range_reduction_large.fast();
}
DoubleDouble tan_y = tan_eval(y);
DoubleDouble msin_k = SIN_K_PI_OVER_128[(k + 128) & 255];
DoubleDouble cos_k = SIN_K_PI_OVER_128[(k + 64) & 255];
DoubleDouble cos_k_tan_y = fputil::quick_mult<NO_FMA>(tan_y, cos_k);
DoubleDouble msin_k_tan_y = fputil::quick_mult<NO_FMA>(tan_y, msin_k);
DoubleDouble num_dd = fputil::exact_add<false>(cos_k_tan_y.hi, -msin_k.hi);
DoubleDouble den_dd = fputil::exact_add<false>(msin_k_tan_y.hi, cos_k.hi);
num_dd.lo += cos_k_tan_y.lo - msin_k.lo;
den_dd.lo += msin_k_tan_y.lo + cos_k.lo;
#ifdef LIBC_MATH_TAN_SKIP_ACCURATE_PASS
double tan_x = (num_dd.hi + num_dd.lo) / (den_dd.hi + den_dd.lo);
return tan_x;
#else
DoubleDouble tan_x = fputil::div(num_dd, den_dd);
constexpr int ERR = 64;
int y_exp = 7 + FPBits(y.hi).get_exponent();
int rel_err_exp = ERR + static_cast<int>((k & 63) == 0) * y_exp;
int64_t tan_x_err = static_cast<int64_t>(FPBits(tan_x.hi).uintval()) -
(static_cast<int64_t>(rel_err_exp) << 52);
double tan_err = FPBits(static_cast<uint64_t>(tan_x_err)).get_val();
double err_higher = tan_x.lo + tan_err;
double err_lower = tan_x.lo - tan_err;
double tan_upper = tan_x.hi + err_higher;
double tan_lower = tan_x.hi + err_lower;
if (LIBC_LIKELY(tan_upper == tan_lower))
return tan_upper;
Float128 u_f128;
if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT))
u_f128 = generic::range_reduction_small_f128(x);
else
u_f128 = range_reduction_large.accurate();
Float128 tan_u = tan_eval(u_f128);
auto get_sin_k = [](unsigned kk) -> Float128 {
unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
Float128 ans = generic::SIN_K_PI_OVER_128_F128[idx];
if (kk & 128)
ans.sign = Sign::NEG;
return ans;
};
Float128 sin_k_f128 = get_sin_k(k);
Float128 cos_k_f128 = get_sin_k(k + 64);
Float128 msin_k_f128 = get_sin_k(k + 128);
Float128 num_f128 =
fputil::quick_add(sin_k_f128, fputil::quick_mul(cos_k_f128, tan_u));
Float128 den_f128 =
fputil::quick_add(cos_k_f128, fputil::quick_mul(msin_k_f128, tan_u));
Float128 result = newton_raphson_div(num_f128, den_f128, 1.0 / den_dd.hi);
return static_cast<double>(result);
#endif
}
}