#pragma once
#include <cmath>
#include <cstring>
#include <cstdint>
inline bool isPowerOf2(int n) {
return n == 1 || (n & (n - 1)) == 0;
}
inline uint32_t RoundUpToPowerOf2(uint32_t v) {
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
}
inline uint32_t RoundUpToPowerOf2(uint32_t v, uint32_t power) {
return (v + power - 1) & ~(power - 1);
}
inline uint32_t log2i(uint32_t val) {
unsigned int ret = -1;
while (val != 0) {
val >>= 1; ret++;
}
return ret;
}
#define PI 3.141592653589793f
#ifndef M_PI
#define M_PI 3.141592653589793f
#endif
template<class T>
inline T clamp_value(T val, T floor, T cap) {
if (val > cap)
return cap;
else if (val < floor)
return floor;
else
return val;
}
inline float saturatef(float x) {
if (x > 1.0f) return 1.0f;
else if (x < 0.0f) return 0.0f;
else return x;
}
#define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1))
#define ROUND_DOWN(x, a) ((x) & ~((a) - 1))
template<class T>
inline void Clamp(T* val, const T& min, const T& max)
{
if (*val < min)
*val = min;
else if (*val > max)
*val = max;
}
template<class T>
inline T Clamp(const T val, const T& min, const T& max)
{
T ret = val;
Clamp(&ret, min, max);
return ret;
}
union FP32 {
uint32_t u;
float f;
};
struct FP16 {
uint16_t u;
};
inline bool my_isinf(float f) {
FP32 f2u;
f2u.f = f;
return f2u.u == 0x7f800000 ||
f2u.u == 0xff800000;
}
inline bool my_isinf_u(uint32_t u) {
return u == 0x7f800000 || u == 0xff800000;
}
inline bool my_isnan(float f) {
FP32 f2u;
f2u.f = f;
return ((f2u.u & 0x7F800000) == 0x7F800000) && (f2u.u & 0x7FFFFF);
}
inline bool my_isnanorinf(float f) {
FP32 f2u;
f2u.f = f;
return ((f2u.u & 0x7F800000) == 0x7F800000);
}
inline float InfToZero(float f) {
return my_isinf(f) ? 0.0f : f;
}
inline int is_even(float d) {
float int_part;
modff(d / 2.0f, &int_part);
return 2.0f * int_part == d;
}
inline double round_ieee_754(double d) {
float i = (float)floor(d);
d -= i;
if (d < 0.5f)
return i;
if (d > 0.5f)
return i + 1.0f;
if (is_even(i))
return i;
return i + 1.0f;
}
inline FP32 half_to_float_fast5(FP16 h)
{
static const FP32 magic = { (127 + (127 - 15)) << 23 };
static const FP32 was_infnan = { (127 + 16) << 23 };
FP32 o;
o.u = (h.u & 0x7fff) << 13;
o.f *= magic.f;
if (o.f >= was_infnan.f)
o.u = (255 << 23) | (h.u & 0x03ff);
o.u |= (h.u & 0x8000) << 16;
return o;
}
inline float ExpandHalf(uint16_t half) {
FP16 fp16;
fp16.u = half;
FP32 fp = half_to_float_fast5(fp16);
return fp.f;
}
inline FP16 float_to_half_fast3(FP32 f)
{
static const FP32 f32infty = { 255 << 23 };
static const FP32 f16infty = { 31 << 23 };
static const FP32 magic = { 15 << 23 };
static const uint32_t sign_mask = 0x80000000u;
static const uint32_t round_mask = ~0xfffu;
FP16 o = { 0 };
uint32_t sign = f.u & sign_mask;
f.u ^= sign;
if (f.u >= f32infty.u)
o.u = (f.u > f32infty.u) ? (0x7e00 | (f.u & 0x3ff)) : 0x7c00;
else
{
f.u &= round_mask;
f.f *= magic.f;
f.u -= round_mask;
if (f.u > f16infty.u) f.u = f16infty.u;
o.u = f.u >> 13;
}
o.u |= sign >> 16;
return o;
}
inline uint16_t ShrinkToHalf(float full) {
FP32 fp32;
fp32.f = full;
FP16 fp = float_to_half_fast3(fp32);
return fp.u;
}
void EnableFZ();
void FPU_SetFastMode();