CoCalc -- math

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/Math/math_util.h
³¹⁸⁶ views
1
#pragma once
2

3
// Some of the stuff in this file are snippets from all over the web, esp. dspmusic.org. I think it's all public domain.
4
// In any case, very little of it is used anywhere at the moment.
5

6
#include <cmath>
7
#include <cstring>
8
#include <cstdint>
9

10
inline bool isPowerOf2(int n) {
11
	return n == 1 || (n & (n - 1)) == 0;
12
}
13

14
// Next power of 2.
15
inline uint32_t RoundUpToPowerOf2(uint32_t v) {
16
	v--;
17
	v |= v >> 1;
18
	v |= v >> 2;
19
	v |= v >> 4;
20
	v |= v >> 8;
21
	v |= v >> 16;
22
	v++;
23
	return v;
24
}
25

26
inline uint32_t RoundUpToPowerOf2(uint32_t v, uint32_t power) {
27
	return (v + power - 1) & ~(power - 1);
28
}
29

30
// TODO: this should just use a bitscan.
31
inline uint32_t log2i(uint32_t val) {
32
	unsigned int ret = -1;
33
	while (val != 0) {
34
		val >>= 1; ret++;
35
	}
36
	return ret;
37
}
38

39
#define PI 3.141592653589793f
40
#ifndef M_PI
41
#define M_PI 3.141592653589793f
42
#endif
43

44
template<class T>
45
inline T clamp_value(T val, T floor, T cap) {
46
	if (val > cap)
47
		return cap;
48
	else if (val < floor)
49
		return floor;
50
	else
51
		return val;
52
}
53

54
// Very common operation, familiar from shaders.
55
inline float saturatef(float x) {
56
	if (x > 1.0f) return 1.0f;
57
	else if (x < 0.0f) return 0.0f;
58
	else return x;
59
}
60

61
#define ROUND_UP(x, a)   (((x) + (a) - 1) & ~((a) - 1))
62
#define ROUND_DOWN(x, a) ((x) & ~((a) - 1))
63

64
template<class T>
65
inline void Clamp(T* val, const T& min, const T& max)
66
{
67
	if (*val < min)
68
		*val = min;
69
	else if (*val > max)
70
		*val = max;
71
}
72

73
template<class T>
74
inline T Clamp(const T val, const T& min, const T& max)
75
{
76
	T ret = val;
77
	Clamp(&ret, min, max);
78
	return ret;
79
}
80

81
union FP32 {
82
	uint32_t u;
83
	float f;
84
};
85

86
struct FP16 {
87
	uint16_t u;
88
};
89

90
inline bool my_isinf(float f) {
91
	FP32 f2u;
92
	f2u.f = f;
93
	return f2u.u == 0x7f800000 ||
94
		f2u.u == 0xff800000;
95
}
96

97
inline bool my_isinf_u(uint32_t u) {
98
	return u == 0x7f800000 || u == 0xff800000;
99
}
100

101
inline bool my_isnan(float f) {
102
	FP32 f2u;
103
	f2u.f = f;
104
	// NaNs have non-zero mantissa
105
	return ((f2u.u & 0x7F800000) == 0x7F800000) && (f2u.u & 0x7FFFFF);
106
}
107

108
inline bool my_isnanorinf(float f) {
109
	FP32 f2u;
110
	f2u.f = f;
111
	// NaNs have non-zero mantissa, infs have zero mantissa. That is, we just ignore the mantissa here.
112
	return ((f2u.u & 0x7F800000) == 0x7F800000);
113
}
114

115
inline float InfToZero(float f) {
116
	return my_isinf(f) ? 0.0f : f;
117
}
118

119
inline int is_even(float d) {
120
	float int_part;
121
	modff(d / 2.0f, &int_part);
122
	return 2.0f * int_part == d;
123
}
124

125
// Rounds *.5 to closest even number
126
inline double round_ieee_754(double d) {
127
	float i = (float)floor(d);
128
	d -= i;
129
	if (d < 0.5f)
130
		return i;
131
	if (d > 0.5f)
132
		return i + 1.0f;
133
	if (is_even(i))
134
		return i;
135
	return i + 1.0f;
136
}
137

138
// magic code from ryg: http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
139
// See also SSE2 version: https://gist.github.com/rygorous/2144712
140
inline FP32 half_to_float_fast5(FP16 h)
141
{
142
	static const FP32 magic = { (127 + (127 - 15)) << 23 };
143
	static const FP32 was_infnan = { (127 + 16) << 23 };
144
	FP32 o;
145
	o.u = (h.u & 0x7fff) << 13;     // exponent/mantissa bits
146
	o.f *= magic.f;                 // exponent adjust
147
	if (o.f >= was_infnan.f)        // make sure Inf/NaN survive (retain the low bits)
148
		o.u = (255 << 23) | (h.u & 0x03ff);
149
	o.u |= (h.u & 0x8000) << 16;    // sign bit
150
	return o;
151
}
152

153
inline float ExpandHalf(uint16_t half) {
154
	FP16 fp16;
155
	fp16.u = half;
156
	FP32 fp = half_to_float_fast5(fp16);
157
	return fp.f;
158
}
159

160
// More magic code: https://gist.github.com/rygorous/2156668
161
inline FP16 float_to_half_fast3(FP32 f)
162
{
163
	static const FP32 f32infty = { 255 << 23 };
164
	static const FP32 f16infty = { 31 << 23 };
165
	static const FP32 magic = { 15 << 23 };
166
	static const uint32_t sign_mask = 0x80000000u;
167
	static const uint32_t round_mask = ~0xfffu;
168
	FP16 o = { 0 };
169

170
	uint32_t sign = f.u & sign_mask;
171
	f.u ^= sign;
172

173
	if (f.u >= f32infty.u) // Inf or NaN (all exponent bits set)
174
		o.u = (f.u > f32infty.u) ? (0x7e00 | (f.u & 0x3ff)) : 0x7c00; // NaN->qNaN and Inf->Inf
175
	else // (De)normalized number or zero
176
	{
177
		f.u &= round_mask;
178
		f.f *= magic.f;
179
		f.u -= round_mask;
180
		if (f.u > f16infty.u) f.u = f16infty.u; // Clamp to signed infinity if overflowed
181

182
		o.u = f.u >> 13; // Take the bits!
183
	}
184

185
	o.u |= sign >> 16;
186
	return o;
187
}
188

189
inline uint16_t ShrinkToHalf(float full) {
190
	FP32 fp32;
191
	fp32.f = full;
192
	FP16 fp = float_to_half_fast3(fp32);
193
	return fp.u;
194
}
195

196
// FPU control.
197
void EnableFZ();
198

199
// Enable both FZ and Default-NaN. Is documented to flip some ARM implementation into a "run-fast" mode
200
// where they can schedule VFP instructions on the NEON unit (these implementations have
201
// very slow VFP units).
202
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0274h/Babffifj.html
203
void FPU_SetFastMode();
204

205
Product

Resources

Company