#ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
#define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
#include "numeric-templates.h"
#include "terminator.h"
#include "tools.h"
#include "flang/Runtime/cpp-type.h"
#include "flang/Runtime/descriptor.h"
#include <algorithm>
namespace Fortran::runtime {
template <typename TYPE, typename ACCUMULATOR>
inline RT_API_ATTRS void DoTotalReduction(const Descriptor &x, int dim,
const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
Terminator &terminator) {
if (dim < 0 || dim > 1) {
terminator.Crash("%s: bad DIM=%d for ARRAY argument with rank %d",
intrinsic, dim, x.rank());
}
SubscriptValue xAt[maxRank];
x.GetLowerBounds(xAt);
if (mask) {
CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
if (mask->rank() > 0) {
SubscriptValue maskAt[maxRank];
mask->GetLowerBounds(maskAt);
for (auto elements{x.Elements()}; elements--;
x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
if (IsLogicalElementTrue(*mask, maskAt)) {
if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
break;
}
}
}
return;
} else if (!IsLogicalScalarTrue(*mask)) {
return;
}
}
for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
break;
}
}
}
template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
inline RT_API_ATTRS CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
const char *source, int line, int dim, const Descriptor *mask,
ACCUMULATOR &&accumulator, const char *intrinsic) {
Terminator terminator{source, line};
RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type());
using CppType = CppTypeFor<CAT, KIND>;
DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
if constexpr (std::is_void_v<CppType>) {
#ifdef _MSC_VER
accumulator.GetResult();
#else
accumulator.template GetResult<CppType>();
#endif
} else {
CppType result;
#ifdef _MSC_VER
accumulator.GetResult(&result);
#else
accumulator.template GetResult<CppType>(&result);
#endif
return result;
}
}
inline RT_API_ATTRS void GetExpandedSubscripts(SubscriptValue at[],
const Descriptor &descriptor, int zeroBasedDim,
const SubscriptValue from[]) {
descriptor.GetLowerBounds(at);
int rank{descriptor.rank()};
int j{0};
for (; j < zeroBasedDim; ++j) {
at[j] += from[j] - 1 ;
}
for (++j; j < rank; ++j) {
at[j] += from[j - 1] - 1;
}
}
template <typename TYPE, typename ACCUMULATOR>
inline RT_API_ATTRS void ReduceDimToScalar(const Descriptor &x,
int zeroBasedDim, SubscriptValue subscripts[], TYPE *result,
ACCUMULATOR &accumulator) {
SubscriptValue xAt[maxRank];
GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
const auto &dim{x.GetDimension(zeroBasedDim)};
SubscriptValue at{dim.LowerBound()};
for (auto n{dim.Extent()}; n-- > 0; ++at) {
xAt[zeroBasedDim] = at;
if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
break;
}
}
#ifdef _MSC_VER
accumulator.GetResult(result, zeroBasedDim);
#else
accumulator.template GetResult<TYPE>(result, zeroBasedDim);
#endif
}
template <typename TYPE, typename ACCUMULATOR>
inline RT_API_ATTRS void ReduceDimMaskToScalar(const Descriptor &x,
int zeroBasedDim, SubscriptValue subscripts[], const Descriptor &mask,
TYPE *result, ACCUMULATOR &accumulator) {
SubscriptValue xAt[maxRank], maskAt[maxRank];
GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
const auto &xDim{x.GetDimension(zeroBasedDim)};
SubscriptValue xPos{xDim.LowerBound()};
const auto &maskDim{mask.GetDimension(zeroBasedDim)};
SubscriptValue maskPos{maskDim.LowerBound()};
for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0;
++xPos, ++maskPos) {
maskAt[zeroBasedDim] = maskPos;
if (IsLogicalElementTrue(mask, maskAt)) {
xAt[zeroBasedDim] = xPos;
if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
break;
}
}
}
#ifdef _MSC_VER
accumulator.GetResult(result, zeroBasedDim);
#else
accumulator.template GetResult<TYPE>(result, zeroBasedDim);
#endif
}
template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
inline RT_API_ATTRS void PartialReduction(Descriptor &result,
const Descriptor &x, std::size_t resultElementSize, int dim,
const Descriptor *mask, Terminator &terminator, const char *intrinsic,
ACCUMULATOR &accumulator) {
CreatePartialReductionResult(result, x, resultElementSize, dim, terminator,
intrinsic, TypeCode{CAT, KIND});
SubscriptValue at[maxRank];
result.GetLowerBounds(at);
INTERNAL_CHECK(result.rank() == 0 || at[0] == 1);
using CppType = CppTypeFor<CAT, KIND>;
if (mask) {
CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
if (mask->rank() > 0) {
for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
accumulator.Reinitialize();
ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
x, dim - 1, at, *mask, result.Element<CppType>(at), accumulator);
}
return;
} else if (!IsLogicalScalarTrue(*mask)) {
accumulator.Reinitialize();
for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
accumulator.GetResult(result.Element<CppType>(at));
}
return;
}
}
for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
accumulator.Reinitialize();
ReduceDimToScalar<CppType, ACCUMULATOR>(
x, dim - 1, at, result.Element<CppType>(at), accumulator);
}
}
template <template <typename> class ACCUM>
struct PartialIntegerReductionHelper {
template <int KIND> struct Functor {
static constexpr int Intermediate{
std::max(KIND, 4)};
RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
int dim, const Descriptor *mask, Terminator &terminator,
const char *intrinsic) const {
using Accumulator =
ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
Accumulator accumulator{x};
PartialReduction<Accumulator, TypeCategory::Integer, KIND>(result, x,
x.ElementBytes(), dim, mask, terminator, intrinsic, accumulator);
}
};
};
template <template <typename> class INTEGER_ACCUM>
inline RT_API_ATTRS void PartialIntegerReduction(Descriptor &result,
const Descriptor &x, int dim, int kind, const Descriptor *mask,
const char *intrinsic, Terminator &terminator) {
ApplyIntegerKind<
PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>(
kind, terminator, result, x, dim, mask, terminator, intrinsic);
}
template <TypeCategory CAT, template <typename> class ACCUM, int MIN_KIND>
struct PartialFloatingReductionHelper {
template <int KIND> struct Functor {
static constexpr int Intermediate{std::max(KIND, MIN_KIND)};
RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
int dim, const Descriptor *mask, Terminator &terminator,
const char *intrinsic) const {
using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
Accumulator accumulator{x};
PartialReduction<Accumulator, CAT, KIND>(result, x, x.ElementBytes(), dim,
mask, terminator, intrinsic, accumulator);
}
};
};
template <template <typename> class INTEGER_ACCUM,
template <typename> class REAL_ACCUM,
template <typename> class COMPLEX_ACCUM, int MIN_REAL_KIND>
inline RT_API_ATTRS void TypedPartialNumericReduction(Descriptor &result,
const Descriptor &x, int dim, const char *source, int line,
const Descriptor *mask, const char *intrinsic) {
Terminator terminator{source, line};
auto catKind{x.type().GetCategoryAndKind()};
RUNTIME_CHECK(terminator, catKind.has_value());
switch (catKind->first) {
case TypeCategory::Integer:
PartialIntegerReduction<INTEGER_ACCUM>(
result, x, dim, catKind->second, mask, intrinsic, terminator);
break;
case TypeCategory::Real:
ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real,
REAL_ACCUM, MIN_REAL_KIND>::template Functor,
void>(catKind->second, terminator, result, x, dim, mask, terminator,
intrinsic);
break;
case TypeCategory::Complex:
ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex,
COMPLEX_ACCUM, MIN_REAL_KIND>::template Functor,
void>(catKind->second, terminator, result, x, dim, mask, terminator,
intrinsic);
break;
default:
terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
}
}
template <typename ACCUMULATOR> struct LocationResultHelper {
template <int KIND> struct Functor {
RT_API_ATTRS void operator()(
ACCUMULATOR &accumulator, const Descriptor &result) const {
accumulator.GetResult(
result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>());
}
};
};
template <typename ACCUMULATOR> struct PartialLocationHelper {
template <int KIND> struct Functor {
RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
int dim, const Descriptor *mask, Terminator &terminator,
const char *intrinsic, ACCUMULATOR &accumulator) const {
PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(result, x,
Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, mask,
terminator, intrinsic, accumulator);
}
};
};
RT_VAR_GROUP_BEGIN
static constexpr RT_CONST_VAR_ATTRS int Norm2LargestLDKind {
#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
16
#elif LDBL_MANT_DIG == 64
10
#else
8
#endif
};
RT_VAR_GROUP_END
template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
int dim, const Descriptor *mask, const char *intrinsic,
Terminator &terminator) {
using Type = CppTypeFor<CAT, KIND>;
ACCUMULATOR accumulator{x};
if (dim == 0 || x.rank() == 1) {
result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
CFI_attribute_allocatable);
if (int stat{result.Allocate()}) {
terminator.Crash(
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
}
DoTotalReduction<Type>(x, dim, mask, accumulator, intrinsic, terminator);
accumulator.GetResult(result.OffsetElement<Type>());
} else {
PartialReduction<ACCUMULATOR, CAT, KIND>(result, x, x.ElementBytes(), dim,
mask, terminator, intrinsic, accumulator);
}
}
template <int KIND>
using Norm2AccumType =
CppTypeFor<TypeCategory::Real, std::clamp(KIND, 8, Norm2LargestLDKind)>;
template <int KIND> class Norm2Accumulator {
public:
using Type = CppTypeFor<TypeCategory::Real, KIND>;
using AccumType = Norm2AccumType<KIND>;
explicit RT_API_ATTRS Norm2Accumulator(const Descriptor &array)
: array_{array} {}
RT_API_ATTRS void Reinitialize() { max_ = sum_ = 0; }
template <typename A>
RT_API_ATTRS void GetResult(A *p, int = -1) const {
*p = static_cast<Type>(max_ * SQRTTy<AccumType>::compute(1 + sum_));
}
RT_API_ATTRS bool Accumulate(Type x) {
auto absX{ABSTy<AccumType>::compute(static_cast<AccumType>(x))};
if (!max_) {
max_ = absX;
} else if (absX > max_) {
auto t{max_ / absX};
auto tsq{t * t};
sum_ *= tsq;
sum_ += tsq;
max_ = absX;
} else {
auto t{absX / max_};
sum_ += t * t;
}
return true;
}
template <typename A>
RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
return Accumulate(*array_.Element<A>(at));
}
private:
const Descriptor &array_;
AccumType max_{0};
AccumType sum_{0};
};
template <int KIND> struct Norm2Helper {
RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, int dim,
const Descriptor *mask, Terminator &terminator) const {
DoMaxMinNorm2<TypeCategory::Real, KIND, Norm2Accumulator<KIND>>(
result, x, dim, mask, "NORM2", terminator);
}
};
}
#endif