Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/CPUDetect.cpp
3185 views
1
// Copyright (C) 2003 Dolphin Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official SVN repository and contact information can be found at
16
// http://code.google.com/p/dolphin-emu/
17
18
// Reference : https://stackoverflow.com/questions/6121792/how-to-check-if-a-cpu-supports-the-sse3-instruction-set
19
#include "ppsspp_config.h"
20
#if (PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)) && !defined(__EMSCRIPTEN__)
21
22
#include "ext/cpu_features/include/cpuinfo_x86.h"
23
24
#if defined(CPU_FEATURES_OS_FREEBSD) || defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) || defined(CPU_FEATURES_OS_MACOS) || defined(CPU_FEATURES_OS_WINDOWS)
25
#define USE_CPU_FEATURES 1
26
#endif
27
28
#ifdef __ANDROID__
29
#include <sys/stat.h>
30
#include <fcntl.h>
31
#elif PPSSPP_PLATFORM(MAC)
32
#include <sys/sysctl.h>
33
#endif
34
35
#include <cstdint>
36
#include <memory.h>
37
#include <set>
38
#include <algorithm>
39
40
#include "Common/Common.h"
41
#include "Common/CPUDetect.h"
42
#include "Common/File/FileUtil.h"
43
#include "Common/StringUtils.h"
44
45
#if defined(_WIN32)
46
#include "Common/CommonWindows.h"
47
48
#define _interlockedbittestandset workaround_ms_header_bug_platform_sdk6_set
49
#define _interlockedbittestandreset workaround_ms_header_bug_platform_sdk6_reset
50
#define _interlockedbittestandset64 workaround_ms_header_bug_platform_sdk6_set64
51
#define _interlockedbittestandreset64 workaround_ms_header_bug_platform_sdk6_reset64
52
#include <intrin.h>
53
#undef _interlockedbittestandset
54
#undef _interlockedbittestandreset
55
#undef _interlockedbittestandset64
56
#undef _interlockedbittestandreset64
57
58
void do_cpuidex(u32 regs[4], u32 cpuid_leaf, u32 ecxval) {
59
__cpuidex((int *)regs, cpuid_leaf, ecxval);
60
}
61
void do_cpuid(u32 regs[4], u32 cpuid_leaf) {
62
__cpuid((int *)regs, cpuid_leaf);
63
}
64
65
#define do_xgetbv _xgetbv
66
67
#else // _WIN32
68
69
#ifdef _M_SSE
70
#include <emmintrin.h>
71
72
static uint64_t do_xgetbv(unsigned int index) {
73
unsigned int eax, edx;
74
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
75
return ((uint64_t)edx << 32) | eax;
76
}
77
#endif // _M_SSE
78
79
#if !PPSSPP_ARCH(MIPS)
80
81
void do_cpuidex(u32 regs[4], u32 cpuid_leaf, u32 ecxval) {
82
#if defined(__i386__) && defined(__PIC__)
83
asm (
84
"xchgl %%ebx, %1;\n\t"
85
"cpuid;\n\t"
86
"xchgl %%ebx, %1;\n\t"
87
:"=a" (regs[0]), "=r" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
88
:"a" (cpuid_leaf), "c" (ecxval));
89
#else
90
asm (
91
"cpuid;\n\t"
92
:"=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
93
:"a" (cpuid_leaf), "c" (ecxval));
94
#endif
95
}
96
void do_cpuid(u32 regs[4], u32 cpuid_leaf)
97
{
98
do_cpuidex(regs, cpuid_leaf, 0);
99
}
100
101
#endif // !PPSSPP_ARCH(MIPS)
102
103
#endif // !win32
104
105
#ifndef _XCR_XFEATURE_ENABLED_MASK
106
#define _XCR_XFEATURE_ENABLED_MASK 0
107
#endif
108
109
CPUInfo cpu_info;
110
111
CPUInfo::CPUInfo() {
112
Detect();
113
}
114
115
#if PPSSPP_PLATFORM(LINUX)
116
static std::vector<int> ParseCPUList(const std::string &filename) {
117
std::string data;
118
std::vector<int> results;
119
120
if (File::ReadSysTextFileToString(Path(filename), &data)) {
121
std::vector<std::string> ranges;
122
SplitString(data, ',', ranges);
123
for (auto range : ranges) {
124
int low = 0, high = 0;
125
int parts = sscanf(range.c_str(), "%d-%d", &low, &high);
126
if (parts == 1) {
127
high = low;
128
}
129
for (int i = low; i <= high; ++i) {
130
results.push_back(i);
131
}
132
}
133
}
134
135
return results;
136
}
137
#endif
138
139
// Detects the various cpu features
140
void CPUInfo::Detect() {
141
#ifdef USE_CPU_FEATURES
142
cpu_features::X86Info info = cpu_features::GetX86Info();
143
#endif
144
145
memset(this, 0, sizeof(*this));
146
#if PPSSPP_ARCH(X86)
147
Mode64bit = false;
148
#elif PPSSPP_ARCH(AMD64)
149
Mode64bit = true;
150
OS64bit = true;
151
#endif
152
num_cores = 1;
153
154
#if PPSSPP_PLATFORM(UWP)
155
OS64bit = Mode64bit; // TODO: Not always accurate!
156
#elif defined(_WIN32) && PPSSPP_ARCH(X86)
157
BOOL f64 = false;
158
IsWow64Process(GetCurrentProcess(), &f64);
159
OS64bit = (f64 == TRUE) ? true : false;
160
#endif
161
// Set obvious defaults, for extra safety
162
if (Mode64bit) {
163
bSSE = true;
164
bSSE2 = true;
165
bLongMode = true;
166
}
167
168
// Assume CPU supports the CPUID instruction. Those that don't can barely
169
// boot modern OS:es anyway.
170
u32 cpu_id[4];
171
memset(cpu_string, 0, sizeof(cpu_string));
172
173
// Detect CPU's CPUID capabilities, and grab cpu string
174
do_cpuid(cpu_id, 0x00000000);
175
u32 max_std_fn = cpu_id[0]; // EAX
176
*((int *)cpu_string) = cpu_id[1];
177
*((int *)(cpu_string + 4)) = cpu_id[3];
178
*((int *)(cpu_string + 8)) = cpu_id[2];
179
do_cpuid(cpu_id, 0x80000000);
180
u32 max_ex_fn = cpu_id[0];
181
if (!strcmp(cpu_string, "GenuineIntel"))
182
vendor = VENDOR_INTEL;
183
else if (!strcmp(cpu_string, "AuthenticAMD"))
184
vendor = VENDOR_AMD;
185
else
186
vendor = VENDOR_OTHER;
187
188
// Set reasonable default brand string even if brand string not available.
189
#ifdef USE_CPU_FEATURES
190
if (info.brand_string[0])
191
strcpy(brand_string, info.brand_string);
192
else
193
#endif
194
strcpy(brand_string, cpu_string);
195
196
#ifdef USE_CPU_FEATURES
197
switch (cpu_features::GetX86Microarchitecture(&info)) {
198
case cpu_features::INTEL_ATOM_BNL:
199
case cpu_features::INTEL_ATOM_SMT:
200
case cpu_features::INTEL_ATOM_GMT:
201
case cpu_features::INTEL_ATOM_GMT_PLUS:
202
case cpu_features::INTEL_ATOM_TMT:
203
bAtom = true;
204
break;
205
default:
206
bAtom = false;
207
break;
208
}
209
210
bPOPCNT = info.features.popcnt;
211
bBMI1 = info.features.bmi1;
212
bBMI2 = info.features.bmi2;
213
bBMI2_fast = bBMI2 && (vendor != VENDOR_AMD || info.family >= 0x19);
214
bMOVBE = info.features.movbe;
215
bLZCNT = info.features.lzcnt;
216
bRTM = info.features.rtm;
217
218
bSSE = info.features.sse;
219
bSSE2 = info.features.sse2;
220
bSSE3 = info.features.sse3;
221
bSSSE3 = info.features.ssse3;
222
bSSE4_1 = info.features.sse4_1;
223
bSSE4_2 = info.features.sse4_2;
224
bSSE4A = info.features.sse4a;
225
bAES = info.features.aes;
226
bSHA = info.features.sha;
227
bF16C = info.features.f16c;
228
bAVX = info.features.avx;
229
bAVX2 = info.features.avx2;
230
bFMA3 = info.features.fma3;
231
bFMA4 = info.features.fma4;
232
#endif
233
234
// Detect family and other misc stuff.
235
bool ht = false;
236
HTT = ht;
237
logical_cpu_count = 1;
238
if (max_std_fn >= 1) {
239
do_cpuid(cpu_id, 0x00000001);
240
#ifndef USE_CPU_FEATURES
241
int family = ((cpu_id[0] >> 8) & 0xf) + ((cpu_id[0] >> 20) & 0xff);
242
int model = ((cpu_id[0] >> 4) & 0xf) + ((cpu_id[0] >> 12) & 0xf0);
243
// Detect people unfortunate enough to be running PPSSPP on an Atom
244
if (family == 6 && (model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 ||
245
model == 0x37 || model == 0x4A || model == 0x4D || model == 0x5A || model == 0x5D))
246
bAtom = true;
247
#endif
248
249
logical_cpu_count = (cpu_id[1] >> 16) & 0xFF;
250
ht = (cpu_id[3] >> 28) & 1;
251
252
#ifndef USE_CPU_FEATURES
253
if ((cpu_id[3] >> 25) & 1) bSSE = true;
254
if ((cpu_id[3] >> 26) & 1) bSSE2 = true;
255
if ((cpu_id[2]) & 1) bSSE3 = true;
256
if ((cpu_id[2] >> 9) & 1) bSSSE3 = true;
257
if ((cpu_id[2] >> 19) & 1) bSSE4_1 = true;
258
if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true;
259
if ((cpu_id[2] >> 28) & 1) {
260
bAVX = true;
261
if ((cpu_id[2] >> 12) & 1)
262
bFMA3 = true;
263
}
264
if ((cpu_id[2] >> 25) & 1) bAES = true;
265
#endif
266
267
if ((cpu_id[3] >> 24) & 1)
268
{
269
// We can use FXSAVE.
270
bFXSR = true;
271
}
272
273
#ifndef USE_CPU_FEATURES
274
// AVX support requires 3 separate checks:
275
// - Is the AVX bit set in CPUID? (>>28)
276
// - Is the XSAVE bit set in CPUID? ( >>26)
277
// - Is the OSXSAVE bit set in CPUID? ( >>27)
278
// - XGETBV result has the XCR bit set.
279
if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1) && ((cpu_id[2] >> 26) & 1)) {
280
if ((do_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) {
281
bAVX = true;
282
if ((cpu_id[2] >> 12) & 1)
283
bFMA3 = true;
284
}
285
}
286
287
288
// TSX support require check:
289
// -- Is the RTM bit set in CPUID? (>>11)
290
// -- No need to check HLE bit because legacy processors ignore HLE hints
291
// -- See https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
292
if (max_std_fn >= 7)
293
{
294
do_cpuid(cpu_id, 0x00000007);
295
// careful; we can't enable AVX2 unless the XSAVE/XGETBV checks above passed
296
if ((cpu_id[1] >> 5) & 1)
297
bAVX2 = bAVX;
298
if ((cpu_id[1] >> 3) & 1)
299
bBMI1 = true;
300
if ((cpu_id[1] >> 8) & 1)
301
bBMI2 = true;
302
if ((cpu_id[1] >> 29) & 1)
303
bSHA = true;
304
if ((cpu_id[1] >> 11) & 1)
305
bRTM = true;
306
}
307
308
bBMI2_fast = bBMI2 && (vendor != VENDOR_AMD || family >= 0x19);
309
#endif
310
}
311
if (max_ex_fn >= 0x80000004) {
312
#ifndef USE_CPU_FEATURES
313
// Extract brand string
314
do_cpuid(cpu_id, 0x80000002);
315
memcpy(brand_string, cpu_id, sizeof(cpu_id));
316
do_cpuid(cpu_id, 0x80000003);
317
memcpy(brand_string + 16, cpu_id, sizeof(cpu_id));
318
do_cpuid(cpu_id, 0x80000004);
319
memcpy(brand_string + 32, cpu_id, sizeof(cpu_id));
320
#endif
321
}
322
if (max_ex_fn >= 0x80000001) {
323
// Check for more features.
324
do_cpuid(cpu_id, 0x80000001);
325
if (cpu_id[2] & 1) bLAHFSAHF64 = true;
326
#ifndef USE_CPU_FEATURES
327
if ((cpu_id[2] >> 6) & 1) bSSE4A = true;
328
if ((cpu_id[2] >> 16) & 1) bFMA4 = true;
329
#endif
330
if ((cpu_id[2] >> 11) & 1) bXOP = true;
331
// CmpLegacy (bit 2) is deprecated.
332
if ((cpu_id[3] >> 29) & 1) bLongMode = true;
333
}
334
335
num_cores = (logical_cpu_count == 0) ? 1 : logical_cpu_count;
336
337
if (max_ex_fn >= 0x80000008) {
338
// Get number of cores. This is a bit complicated. Following AMD manual here.
339
do_cpuid(cpu_id, 0x80000008);
340
int apic_id_core_id_size = (cpu_id[2] >> 12) & 0xF;
341
if (apic_id_core_id_size == 0) {
342
if (ht) {
343
// 0x0B is the preferred method on Core i series processors.
344
// Inspired by https://github.com/D-Programming-Language/druntime/blob/23b0d1f41e27638bda2813af55823b502195a58d/src/core/cpuid.d#L562.
345
bool hasLeafB = false;
346
if (vendor == VENDOR_INTEL && max_std_fn >= 0x0B) {
347
do_cpuidex(cpu_id, 0x0B, 0);
348
if (cpu_id[1] != 0) {
349
logical_cpu_count = cpu_id[1] & 0xFFFF;
350
do_cpuidex(cpu_id, 0x0B, 1);
351
int totalThreads = cpu_id[1] & 0xFFFF;
352
num_cores = totalThreads / logical_cpu_count;
353
hasLeafB = true;
354
}
355
}
356
// Old new mechanism for modern Intel CPUs.
357
if (!hasLeafB && vendor == VENDOR_INTEL) {
358
do_cpuid(cpu_id, 0x00000004);
359
int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1;
360
HTT = (cores_x_package < logical_cpu_count);
361
cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1;
362
num_cores = (cores_x_package > 1) ? cores_x_package : num_cores;
363
logical_cpu_count /= cores_x_package;
364
}
365
}
366
} else {
367
// Use AMD's new method.
368
num_cores = (cpu_id[2] & 0xFF) + 1;
369
}
370
}
371
372
// The above only gets valid info for the active processor.
373
// Let's rely on OS APIs for accurate information, if available, below.
374
375
#if PPSSPP_PLATFORM(WINDOWS)
376
#if !PPSSPP_PLATFORM(UWP)
377
typedef BOOL (WINAPI *getLogicalProcessorInformationEx_f)(LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, PDWORD ReturnedLength);
378
getLogicalProcessorInformationEx_f getLogicalProcessorInformationEx = nullptr;
379
HMODULE kernel32 = GetModuleHandle(L"kernel32.dll");
380
if (kernel32)
381
getLogicalProcessorInformationEx = (getLogicalProcessorInformationEx_f)GetProcAddress(kernel32, "GetLogicalProcessorInformationEx");
382
#else
383
void *getLogicalProcessorInformationEx = nullptr;
384
#endif
385
386
if (getLogicalProcessorInformationEx) {
387
#if !PPSSPP_PLATFORM(UWP)
388
DWORD len = 0;
389
getLogicalProcessorInformationEx(RelationAll, nullptr, &len);
390
auto processors = new uint8_t[len];
391
if (getLogicalProcessorInformationEx(RelationAll, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)processors, &len)) {
392
num_cores = 0;
393
logical_cpu_count = 0;
394
auto p = processors;
395
while (p < processors + len) {
396
const auto &processor = *(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)p;
397
if (processor.Relationship == RelationProcessorCore) {
398
num_cores++;
399
for (int j = 0; j < processor.Processor.GroupCount; ++j) {
400
const auto &mask = processor.Processor.GroupMask[j].Mask;
401
for (int i = 0; i < sizeof(mask) * 8; ++i) {
402
logical_cpu_count += (mask >> i) & 1;
403
}
404
}
405
}
406
p += processor.Size;
407
}
408
}
409
delete [] processors;
410
#endif
411
} else {
412
DWORD len = 0;
413
const DWORD sz = sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
414
GetLogicalProcessorInformation(nullptr, &len);
415
std::vector<SYSTEM_LOGICAL_PROCESSOR_INFORMATION> processors;
416
processors.resize((len + sz - 1) / sz);
417
if (GetLogicalProcessorInformation(&processors[0], &len)) {
418
num_cores = 0;
419
logical_cpu_count = 0;
420
for (const auto &processor : processors) {
421
if (processor.Relationship == RelationProcessorCore) {
422
num_cores++;
423
for (int i = 0; i < sizeof(processor.ProcessorMask) * 8; ++i) {
424
logical_cpu_count += (processor.ProcessorMask >> i) & 1;
425
}
426
}
427
}
428
}
429
}
430
431
// This seems to be the count per core. Hopefully all cores are the same, but we counted each above.
432
logical_cpu_count /= std::max(num_cores, 1);
433
#elif PPSSPP_PLATFORM(LINUX)
434
if (File::Exists(Path("/sys/devices/system/cpu/present"))) {
435
// This may not count unplugged cores, but at least it's a best guess.
436
// Also, this assumes the CPU cores are heterogeneous (e.g. all cores could be active simultaneously.)
437
num_cores = 0;
438
logical_cpu_count = 0;
439
440
std::set<int> counted_cores;
441
auto present = ParseCPUList("/sys/devices/system/cpu/present");
442
for (int id : present) {
443
logical_cpu_count++;
444
445
if (counted_cores.count(id) == 0) {
446
num_cores++;
447
counted_cores.insert(id);
448
449
// Also count any thread siblings as counted.
450
auto threads = ParseCPUList(StringFromFormat("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", id));
451
for (int mark_id : threads) {
452
counted_cores.insert(mark_id);
453
}
454
}
455
}
456
}
457
458
// This seems to be the count per core. Hopefully all cores are the same, but we counted each above.
459
logical_cpu_count /= std::max(num_cores, 1);
460
#elif PPSSPP_PLATFORM(MAC)
461
int num = 0;
462
size_t sz = sizeof(num);
463
if (sysctlbyname("hw.physicalcpu_max", &num, &sz, nullptr, 0) == 0) {
464
num_cores = num;
465
sz = sizeof(num);
466
if (sysctlbyname("hw.logicalcpu_max", &num, &sz, nullptr, 0) == 0) {
467
logical_cpu_count = num / std::max(num_cores, 1);
468
}
469
}
470
#endif
471
if (logical_cpu_count <= 0)
472
logical_cpu_count = 1;
473
}
474
475
std::vector<std::string> CPUInfo::Features() {
476
std::vector<std::string> features;
477
478
struct Flag {
479
bool &flag;
480
const char *str;
481
};
482
const Flag list[] = {
483
{ bSSE, "SSE" },
484
{ bSSE2, "SSE2" },
485
{ bSSE3, "SSE3" },
486
{ bSSSE3, "SSSE3" },
487
{ bSSE4_1, "SSE4.1" },
488
{ bSSE4_2, "SSE4.2" },
489
{ bSSE4A, "SSE4A" },
490
{ HTT, "HTT" },
491
{ bAVX, "AVX" },
492
{ bAVX2, "AVX2" },
493
{ bFMA3, "FMA3" },
494
{ bFMA4, "FMA4" },
495
{ bAES, "AES" },
496
{ bSHA, "SHA" },
497
{ bXOP, "XOP" },
498
{ bRTM, "TSX" },
499
{ bF16C, "F16C" },
500
{ bBMI1, "BMI1" },
501
{ bBMI2, "BMI2" },
502
{ bPOPCNT, "POPCNT" },
503
{ bMOVBE, "MOVBE" },
504
{ bLZCNT, "LZCNT" },
505
{ bLongMode, "64-bit support" },
506
};
507
508
for (auto &item : list) {
509
if (item.flag) {
510
features.push_back(item.str);
511
}
512
}
513
514
return features;
515
}
516
517
// Turn the cpu info into a string we can show
518
std::string CPUInfo::Summarize() {
519
std::string sum;
520
if (num_cores == 1) {
521
sum = StringFromFormat("%s, %d core", cpu_string, num_cores);
522
} else {
523
sum = StringFromFormat("%s, %d cores", cpu_string, num_cores);
524
if (HTT)
525
sum += StringFromFormat(" (%i logical threads per physical core)", logical_cpu_count);
526
}
527
528
auto features = Features();
529
for (std::string &feature : features) {
530
sum += ", " + feature;
531
}
532
return sum;
533
}
534
535
#endif // PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
536
537
const char *GetCompilerABI() {
538
#if PPSSPP_ARCH(ARMV7)
539
return "armeabi-v7a";
540
#elif PPSSPP_ARCH(ARM64)
541
return "arm64";
542
#elif PPSSPP_ARCH(X86)
543
return "x86";
544
#elif PPSSPP_ARCH(AMD64)
545
return "x86-64";
546
#elif PPSSPP_ARCH(RISCV64)
547
//https://github.com/riscv/riscv-toolchain-conventions#cc-preprocessor-definitions
548
//https://github.com/riscv/riscv-c-api-doc/blob/master/riscv-c-api.md#abi-related-preprocessor-definitions
549
#if defined(__riscv_float_abi_single)
550
return "lp64f";
551
#elif defined(__riscv_float_abi_double)
552
return "lp64d";
553
#elif defined(__riscv_float_abi_quad)
554
return "lp64q";
555
#elif defined(__riscv_float_abi_soft)
556
return "lp64";
557
#endif
558
#else
559
return "other";
560
#endif
561
}
562
563