Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/topology.c
29269 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* CPU/APIC topology
4
*
5
* The APIC IDs describe the system topology in multiple domain levels.
6
* The CPUID topology parser provides the information which part of the
7
* APIC ID is associated to the individual levels:
8
*
9
* [PACKAGE][DIEGRP][DIE][TILE][MODULE][CORE][THREAD]
10
*
11
* The root space contains the package (socket) IDs.
12
*
13
* Not enumerated levels consume 0 bits space, but conceptually they are
14
* always represented. If e.g. only CORE and THREAD levels are enumerated
15
* then the DIE, MODULE and TILE have the same physical ID as the PACKAGE.
16
*
17
* If SMT is not supported, then the THREAD domain is still used. It then
18
* has the same physical ID as the CORE domain and is the only child of
19
* the core domain.
20
*
21
* This allows a unified view on the system independent of the enumerated
22
* domain levels without requiring any conditionals in the code.
23
*/
24
#define pr_fmt(fmt) "CPU topo: " fmt
25
#include <linux/cpu.h>
26
27
#include <xen/xen.h>
28
29
#include <asm/apic.h>
30
#include <asm/hypervisor.h>
31
#include <asm/io_apic.h>
32
#include <asm/mpspec.h>
33
#include <asm/msr.h>
34
#include <asm/smp.h>
35
36
#include "cpu.h"
37
38
/*
39
* Map cpu index to physical APIC ID
40
*/
41
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID);
42
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID);
43
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
44
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
45
46
/* Bitmap of physically present CPUs. */
47
DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly;
48
49
/* Used for CPU number allocation and parallel CPU bringup */
50
u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, };
51
52
/* Bitmaps to mark registered APICs at each topology domain */
53
static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init;
54
55
/*
56
* Keep track of assigned, disabled and rejected CPUs. Present assigned
57
* with 1 as CPU #0 is reserved for the boot CPU.
58
*/
59
static struct {
60
unsigned int nr_assigned_cpus;
61
unsigned int nr_disabled_cpus;
62
unsigned int nr_rejected_cpus;
63
u32 boot_cpu_apic_id;
64
u32 real_bsp_apic_id;
65
} topo_info __ro_after_init = {
66
.nr_assigned_cpus = 1,
67
.boot_cpu_apic_id = BAD_APICID,
68
.real_bsp_apic_id = BAD_APICID,
69
};
70
71
#define domain_weight(_dom) bitmap_weight(apic_maps[_dom].map, MAX_LOCAL_APIC)
72
73
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
74
{
75
return phys_id == (u64)cpuid_to_apicid[cpu];
76
}
77
78
#ifdef CONFIG_SMP
79
static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
80
{
81
if (!(apicid & (__max_threads_per_core - 1)))
82
cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
83
}
84
#else
85
static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
86
#endif
87
88
/*
89
* Convert the APIC ID to a domain level ID by masking out the low bits
90
* below the domain level @dom.
91
*/
92
static inline u32 topo_apicid(u32 apicid, enum x86_topology_domains dom)
93
{
94
if (dom == TOPO_SMT_DOMAIN)
95
return apicid;
96
return apicid & (UINT_MAX << x86_topo_system.dom_shifts[dom - 1]);
97
}
98
99
static int topo_lookup_cpuid(u32 apic_id)
100
{
101
int i;
102
103
/* CPU# to APICID mapping is persistent once it is established */
104
for (i = 0; i < topo_info.nr_assigned_cpus; i++) {
105
if (cpuid_to_apicid[i] == apic_id)
106
return i;
107
}
108
return -ENODEV;
109
}
110
111
static __init int topo_get_cpunr(u32 apic_id)
112
{
113
int cpu = topo_lookup_cpuid(apic_id);
114
115
if (cpu >= 0)
116
return cpu;
117
118
return topo_info.nr_assigned_cpus++;
119
}
120
121
static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
122
{
123
#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
124
early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
125
early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
126
#endif
127
set_cpu_present(cpu, true);
128
}
129
130
static __init bool check_for_real_bsp(u32 apic_id)
131
{
132
bool is_bsp = false, has_apic_base = boot_cpu_data.x86 >= 6;
133
u64 msr;
134
135
/*
136
* There is no real good way to detect whether this a kdump()
137
* kernel, but except on the Voyager SMP monstrosity which is not
138
* longer supported, the real BSP APIC ID is the first one which is
139
* enumerated by firmware. That allows to detect whether the boot
140
* CPU is the real BSP. If it is not, then do not register the APIC
141
* because sending INIT to the real BSP would reset the whole
142
* system.
143
*
144
* The first APIC ID which is enumerated by firmware is detectable
145
* because the boot CPU APIC ID is registered before that without
146
* invoking this code.
147
*/
148
if (topo_info.real_bsp_apic_id != BAD_APICID)
149
return false;
150
151
/*
152
* Check whether the enumeration order is broken by evaluating the
153
* BSP bit in the APICBASE MSR. If the CPU does not have the
154
* APICBASE MSR then the BSP detection is not possible and the
155
* kernel must rely on the firmware enumeration order.
156
*/
157
if (has_apic_base) {
158
rdmsrq(MSR_IA32_APICBASE, msr);
159
is_bsp = !!(msr & MSR_IA32_APICBASE_BSP);
160
}
161
162
if (apic_id == topo_info.boot_cpu_apic_id) {
163
/*
164
* If the boot CPU has the APIC BSP bit set then the
165
* firmware enumeration is agreeing. If the CPU does not
166
* have the APICBASE MSR then the only choice is to trust
167
* the enumeration order.
168
*/
169
if (is_bsp || !has_apic_base) {
170
topo_info.real_bsp_apic_id = apic_id;
171
return false;
172
}
173
/*
174
* If the boot APIC is enumerated first, but the APICBASE
175
* MSR does not have the BSP bit set, then there is no way
176
* to discover the real BSP here. Assume a crash kernel and
177
* limit the number of CPUs to 1 as an INIT to the real BSP
178
* would reset the machine.
179
*/
180
pr_warn("Enumerated BSP APIC %x is not marked in APICBASE MSR\n", apic_id);
181
pr_warn("Assuming crash kernel. Limiting to one CPU to prevent machine INIT\n");
182
set_nr_cpu_ids(1);
183
goto fwbug;
184
}
185
186
pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x != %x\n",
187
topo_info.boot_cpu_apic_id, apic_id);
188
189
if (is_bsp) {
190
/*
191
* The boot CPU has the APIC BSP bit set. Use it and complain
192
* about the broken firmware enumeration.
193
*/
194
topo_info.real_bsp_apic_id = topo_info.boot_cpu_apic_id;
195
goto fwbug;
196
}
197
198
pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n");
199
200
topo_info.real_bsp_apic_id = apic_id;
201
return true;
202
203
fwbug:
204
pr_warn(FW_BUG "APIC enumeration order not specification compliant\n");
205
return false;
206
}
207
208
static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level,
209
unsigned long *map)
210
{
211
unsigned int id, end, cnt = 0;
212
213
/* Calculate the exclusive end */
214
end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]);
215
216
/* Unfortunately there is no bitmap_weight_range() */
217
for (id = find_next_bit(map, end, lvlid); id < end; id = find_next_bit(map, end, ++id))
218
cnt++;
219
return cnt;
220
}
221
222
static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
223
{
224
int cpu, dom;
225
226
if (present) {
227
set_bit(apic_id, phys_cpu_present_map);
228
229
/*
230
* Double registration is valid in case of the boot CPU
231
* APIC because that is registered before the enumeration
232
* of the APICs via firmware parsers or VM guest
233
* mechanisms.
234
*/
235
if (apic_id == topo_info.boot_cpu_apic_id)
236
cpu = 0;
237
else
238
cpu = topo_get_cpunr(apic_id);
239
240
cpuid_to_apicid[cpu] = apic_id;
241
topo_set_cpuids(cpu, apic_id, acpi_id);
242
} else {
243
u32 pkgid = topo_apicid(apic_id, TOPO_PKG_DOMAIN);
244
245
/*
246
* Check for present APICs in the same package when running
247
* on bare metal. Allow the bogosity in a guest.
248
*/
249
if (hypervisor_is_type(X86_HYPER_NATIVE) &&
250
topo_unit_count(pkgid, TOPO_PKG_DOMAIN, phys_cpu_present_map)) {
251
pr_info_once("Ignoring hot-pluggable APIC ID %x in present package.\n",
252
apic_id);
253
topo_info.nr_rejected_cpus++;
254
return;
255
}
256
257
topo_info.nr_disabled_cpus++;
258
}
259
260
/*
261
* Register present and possible CPUs in the domain
262
* maps. cpu_possible_map will be updated in
263
* topology_init_possible_cpus() after enumeration is done.
264
*/
265
for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
266
set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
267
}
268
269
/**
270
* topology_register_apic - Register an APIC in early topology maps
271
* @apic_id: The APIC ID to set up
272
* @acpi_id: The ACPI ID associated to the APIC
273
* @present: True if the corresponding CPU is present
274
*/
275
void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present)
276
{
277
if (apic_id >= MAX_LOCAL_APIC) {
278
pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1);
279
topo_info.nr_rejected_cpus++;
280
return;
281
}
282
283
if (check_for_real_bsp(apic_id)) {
284
topo_info.nr_rejected_cpus++;
285
return;
286
}
287
288
/* CPU numbers exhausted? */
289
if (apic_id != topo_info.boot_cpu_apic_id && topo_info.nr_assigned_cpus >= nr_cpu_ids) {
290
pr_warn_once("CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids);
291
topo_info.nr_rejected_cpus++;
292
return;
293
}
294
295
topo_register_apic(apic_id, acpi_id, present);
296
}
297
298
/**
299
* topology_register_boot_apic - Register the boot CPU APIC
300
* @apic_id: The APIC ID to set up
301
*
302
* Separate so CPU #0 can be assigned
303
*/
304
void __init topology_register_boot_apic(u32 apic_id)
305
{
306
WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID);
307
308
topo_info.boot_cpu_apic_id = apic_id;
309
topo_register_apic(apic_id, CPU_ACPIID_INVALID, true);
310
}
311
312
/**
313
* topology_get_logical_id - Retrieve the logical ID at a given topology domain level
314
* @apicid: The APIC ID for which to lookup the logical ID
315
* @at_level: The topology domain level to use
316
*
317
* @apicid must be a full APIC ID, not the normalized variant. It's valid to have
318
* all bits below the domain level specified by @at_level to be clear. So both
319
* real APIC IDs and backshifted normalized APIC IDs work correctly.
320
*
321
* Returns:
322
* - >= 0: The requested logical ID
323
* - -ERANGE: @apicid is out of range
324
* - -ENODEV: @apicid is not registered
325
*/
326
int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level)
327
{
328
/* Remove the bits below @at_level to get the proper level ID of @apicid */
329
unsigned int lvlid = topo_apicid(apicid, at_level);
330
331
if (lvlid >= MAX_LOCAL_APIC)
332
return -ERANGE;
333
if (!test_bit(lvlid, apic_maps[at_level].map))
334
return -ENODEV;
335
/* Get the number of set bits before @lvlid. */
336
return bitmap_weight(apic_maps[at_level].map, lvlid);
337
}
338
EXPORT_SYMBOL_GPL(topology_get_logical_id);
339
340
/**
341
* topology_unit_count - Retrieve the count of specified units at a given topology domain level
342
* @apicid: The APIC ID which specifies the search range
343
* @which_units: The domain level specifying the units to count
344
* @at_level: The domain level at which @which_units have to be counted
345
*
346
* This returns the number of possible units according to the enumerated
347
* information.
348
*
349
* E.g. topology_count_units(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN)
350
* counts the number of possible cores in the package to which @apicid
351
* belongs.
352
*
353
* @at_level must obviously be greater than @which_level to produce useful
354
* results. If @at_level is equal to @which_units the result is
355
* unsurprisingly 1. If @at_level is less than @which_units the results
356
* is by definition undefined and the function returns 0.
357
*/
358
unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units,
359
enum x86_topology_domains at_level)
360
{
361
/* Remove the bits below @at_level to get the proper level ID of @apicid */
362
unsigned int lvlid = topo_apicid(apicid, at_level);
363
364
if (lvlid >= MAX_LOCAL_APIC)
365
return 0;
366
if (!test_bit(lvlid, apic_maps[at_level].map))
367
return 0;
368
if (which_units > at_level)
369
return 0;
370
if (which_units == at_level)
371
return 1;
372
return topo_unit_count(lvlid, at_level, apic_maps[which_units].map);
373
}
374
375
#ifdef CONFIG_SMP
376
int topology_get_primary_thread(unsigned int cpu)
377
{
378
u32 apic_id = cpuid_to_apicid[cpu];
379
380
/*
381
* Get the core domain level APIC id, which is the primary thread
382
* and return the CPU number assigned to it.
383
*/
384
return topo_lookup_cpuid(topo_apicid(apic_id, TOPO_CORE_DOMAIN));
385
}
386
#endif
387
388
#ifdef CONFIG_ACPI_HOTPLUG_CPU
389
/**
390
* topology_hotplug_apic - Handle a physical hotplugged APIC after boot
391
* @apic_id: The APIC ID to set up
392
* @acpi_id: The ACPI ID associated to the APIC
393
*/
394
int topology_hotplug_apic(u32 apic_id, u32 acpi_id)
395
{
396
int cpu;
397
398
if (apic_id >= MAX_LOCAL_APIC)
399
return -EINVAL;
400
401
/* Reject if the APIC ID was not registered during enumeration. */
402
if (!test_bit(apic_id, apic_maps[TOPO_SMT_DOMAIN].map))
403
return -ENODEV;
404
405
cpu = topo_lookup_cpuid(apic_id);
406
if (cpu < 0)
407
return -ENOSPC;
408
409
set_bit(apic_id, phys_cpu_present_map);
410
topo_set_cpuids(cpu, apic_id, acpi_id);
411
cpu_mark_primary_thread(cpu, apic_id);
412
return cpu;
413
}
414
415
/**
416
* topology_hotunplug_apic - Remove a physical hotplugged APIC after boot
417
* @cpu: The CPU number for which the APIC ID is removed
418
*/
419
void topology_hotunplug_apic(unsigned int cpu)
420
{
421
u32 apic_id = cpuid_to_apicid[cpu];
422
423
if (apic_id == BAD_APICID)
424
return;
425
426
per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
427
clear_bit(apic_id, phys_cpu_present_map);
428
set_cpu_present(cpu, false);
429
}
430
#endif
431
432
#ifdef CONFIG_X86_LOCAL_APIC
433
static unsigned int max_possible_cpus __initdata = NR_CPUS;
434
435
/**
436
* topology_apply_cmdline_limits_early - Apply topology command line limits early
437
*
438
* Ensure that command line limits are in effect before firmware parsing
439
* takes place.
440
*/
441
void __init topology_apply_cmdline_limits_early(void)
442
{
443
unsigned int possible = nr_cpu_ids;
444
445
/* 'maxcpus=0' 'nosmp' 'nolapic' */
446
if (!setup_max_cpus || apic_is_disabled)
447
possible = 1;
448
449
/* 'possible_cpus=N' */
450
possible = min_t(unsigned int, max_possible_cpus, possible);
451
452
if (possible < nr_cpu_ids) {
453
pr_info("Limiting to %u possible CPUs\n", possible);
454
set_nr_cpu_ids(possible);
455
}
456
}
457
458
static __init bool restrict_to_up(void)
459
{
460
if (!smp_found_config)
461
return true;
462
/*
463
* XEN PV is special as it does not advertise the local APIC
464
* properly, but provides a fake topology for it so that the
465
* infrastructure works. So don't apply the restrictions vs. APIC
466
* here.
467
*/
468
if (xen_pv_domain())
469
return false;
470
471
return apic_is_disabled;
472
}
473
474
void __init topology_init_possible_cpus(void)
475
{
476
unsigned int assigned = topo_info.nr_assigned_cpus;
477
unsigned int disabled = topo_info.nr_disabled_cpus;
478
unsigned int cnta, cntb, cpu, allowed = 1;
479
unsigned int total = assigned + disabled;
480
u32 apicid, firstid;
481
482
/*
483
* If there was no APIC registered, then fake one so that the
484
* topology bitmap is populated. That ensures that the code below
485
* is valid and the various query interfaces can be used
486
* unconditionally. This does not affect the actual APIC code in
487
* any way because either the local APIC address has not been
488
* registered or the local APIC was disabled on the command line.
489
*/
490
if (topo_info.boot_cpu_apic_id == BAD_APICID)
491
topology_register_boot_apic(0);
492
493
if (!restrict_to_up()) {
494
if (WARN_ON_ONCE(assigned > nr_cpu_ids)) {
495
disabled += assigned - nr_cpu_ids;
496
assigned = nr_cpu_ids;
497
}
498
allowed = min_t(unsigned int, total, nr_cpu_ids);
499
}
500
501
if (total > allowed)
502
pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed);
503
504
assigned = min_t(unsigned int, allowed, assigned);
505
disabled = allowed - assigned;
506
507
topo_info.nr_assigned_cpus = assigned;
508
topo_info.nr_disabled_cpus = disabled;
509
510
total_cpus = allowed;
511
set_nr_cpu_ids(allowed);
512
513
cnta = domain_weight(TOPO_PKG_DOMAIN);
514
cntb = domain_weight(TOPO_DIE_DOMAIN);
515
__max_logical_packages = cnta;
516
__max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta));
517
518
pr_info("Max. logical packages: %3u\n", cnta);
519
pr_info("Max. logical dies: %3u\n", cntb);
520
pr_info("Max. dies per package: %3u\n", __max_dies_per_package);
521
522
cnta = domain_weight(TOPO_CORE_DOMAIN);
523
cntb = domain_weight(TOPO_SMT_DOMAIN);
524
/*
525
* Can't use order delta here as order(cnta) can be equal
526
* order(cntb) even if cnta != cntb.
527
*/
528
__max_threads_per_core = DIV_ROUND_UP(cntb, cnta);
529
pr_info("Max. threads per core: %3u\n", __max_threads_per_core);
530
531
firstid = find_first_bit(apic_maps[TOPO_SMT_DOMAIN].map, MAX_LOCAL_APIC);
532
__num_cores_per_package = topology_unit_count(firstid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN);
533
pr_info("Num. cores per package: %3u\n", __num_cores_per_package);
534
__num_threads_per_package = topology_unit_count(firstid, TOPO_SMT_DOMAIN, TOPO_PKG_DOMAIN);
535
pr_info("Num. threads per package: %3u\n", __num_threads_per_package);
536
537
pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled);
538
if (topo_info.nr_rejected_cpus)
539
pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus);
540
541
init_cpu_present(cpumask_of(0));
542
init_cpu_possible(cpumask_of(0));
543
544
/* Assign CPU numbers to non-present CPUs */
545
for (apicid = 0; disabled; disabled--, apicid++) {
546
apicid = find_next_andnot_bit(apic_maps[TOPO_SMT_DOMAIN].map, phys_cpu_present_map,
547
MAX_LOCAL_APIC, apicid);
548
if (apicid >= MAX_LOCAL_APIC)
549
break;
550
cpuid_to_apicid[topo_info.nr_assigned_cpus++] = apicid;
551
}
552
553
for (cpu = 0; cpu < allowed; cpu++) {
554
apicid = cpuid_to_apicid[cpu];
555
556
set_cpu_possible(cpu, true);
557
558
if (apicid == BAD_APICID)
559
continue;
560
561
cpu_mark_primary_thread(cpu, apicid);
562
set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map));
563
}
564
}
565
566
/*
567
* Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed.
568
*/
569
void __init topology_reset_possible_cpus_up(void)
570
{
571
init_cpu_present(cpumask_of(0));
572
init_cpu_possible(cpumask_of(0));
573
574
bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC);
575
if (topo_info.boot_cpu_apic_id != BAD_APICID)
576
set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map);
577
}
578
579
static int __init setup_possible_cpus(char *str)
580
{
581
get_option(&str, &max_possible_cpus);
582
return 0;
583
}
584
early_param("possible_cpus", setup_possible_cpus);
585
#endif
586
587