Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/cpufreq/acpi-cpufreq.c
29265 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* acpi-cpufreq.c - ACPI Processor P-States Driver
4
*
5
* Copyright (C) 2001, 2002 Andy Grover <[email protected]>
6
* Copyright (C) 2001, 2002 Paul Diefenbaugh <[email protected]>
7
* Copyright (C) 2002 - 2004 Dominik Brodowski <[email protected]>
8
* Copyright (C) 2006 Denis Sadykov <[email protected]>
9
*/
10
11
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13
#include <linux/kernel.h>
14
#include <linux/module.h>
15
#include <linux/init.h>
16
#include <linux/smp.h>
17
#include <linux/sched.h>
18
#include <linux/cpufreq.h>
19
#include <linux/compiler.h>
20
#include <linux/dmi.h>
21
#include <linux/slab.h>
22
#include <linux/string_helpers.h>
23
#include <linux/platform_device.h>
24
25
#include <linux/acpi.h>
26
#include <linux/io.h>
27
#include <linux/delay.h>
28
#include <linux/uaccess.h>
29
30
#include <acpi/processor.h>
31
#include <acpi/cppc_acpi.h>
32
33
#include <asm/msr.h>
34
#include <asm/processor.h>
35
#include <asm/cpufeature.h>
36
#include <asm/cpu_device_id.h>
37
38
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
39
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
40
MODULE_LICENSE("GPL");
41
42
enum {
43
UNDEFINED_CAPABLE = 0,
44
SYSTEM_INTEL_MSR_CAPABLE,
45
SYSTEM_AMD_MSR_CAPABLE,
46
SYSTEM_IO_CAPABLE,
47
};
48
49
#define INTEL_MSR_RANGE (0xffff)
50
#define AMD_MSR_RANGE (0x7)
51
#define HYGON_MSR_RANGE (0x7)
52
53
struct acpi_cpufreq_data {
54
unsigned int resume;
55
unsigned int cpu_feature;
56
unsigned int acpi_perf_cpu;
57
cpumask_var_t freqdomain_cpus;
58
void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
59
u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
60
};
61
62
/* acpi_perf_data is a pointer to percpu data. */
63
static struct acpi_processor_performance __percpu *acpi_perf_data;
64
65
static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data)
66
{
67
return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu);
68
}
69
70
static struct cpufreq_driver acpi_cpufreq_driver;
71
72
static unsigned int acpi_pstate_strict;
73
74
static bool boost_state(unsigned int cpu)
75
{
76
u64 msr;
77
78
switch (boot_cpu_data.x86_vendor) {
79
case X86_VENDOR_INTEL:
80
case X86_VENDOR_CENTAUR:
81
case X86_VENDOR_ZHAOXIN:
82
rdmsrq_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &msr);
83
return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
84
case X86_VENDOR_HYGON:
85
case X86_VENDOR_AMD:
86
rdmsrq_on_cpu(cpu, MSR_K7_HWCR, &msr);
87
return !(msr & MSR_K7_HWCR_CPB_DIS);
88
}
89
return false;
90
}
91
92
static int boost_set_msr(bool enable)
93
{
94
u32 msr_addr;
95
u64 msr_mask, val;
96
97
switch (boot_cpu_data.x86_vendor) {
98
case X86_VENDOR_INTEL:
99
case X86_VENDOR_CENTAUR:
100
case X86_VENDOR_ZHAOXIN:
101
msr_addr = MSR_IA32_MISC_ENABLE;
102
msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
103
break;
104
case X86_VENDOR_HYGON:
105
case X86_VENDOR_AMD:
106
msr_addr = MSR_K7_HWCR;
107
msr_mask = MSR_K7_HWCR_CPB_DIS;
108
break;
109
default:
110
return -EINVAL;
111
}
112
113
rdmsrq(msr_addr, val);
114
115
if (enable)
116
val &= ~msr_mask;
117
else
118
val |= msr_mask;
119
120
wrmsrq(msr_addr, val);
121
return 0;
122
}
123
124
static void boost_set_msr_each(void *p_en)
125
{
126
bool enable = (bool) p_en;
127
128
boost_set_msr(enable);
129
}
130
131
static int set_boost(struct cpufreq_policy *policy, int val)
132
{
133
on_each_cpu_mask(policy->cpus, boost_set_msr_each,
134
(void *)(long)val, 1);
135
pr_debug("CPU %*pbl: Core Boosting %s.\n",
136
cpumask_pr_args(policy->cpus), str_enabled_disabled(val));
137
138
return 0;
139
}
140
141
static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
142
{
143
struct acpi_cpufreq_data *data = policy->driver_data;
144
145
if (unlikely(!data))
146
return -ENODEV;
147
148
return cpufreq_show_cpus(data->freqdomain_cpus, buf);
149
}
150
151
cpufreq_freq_attr_ro(freqdomain_cpus);
152
153
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
154
static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
155
size_t count)
156
{
157
int ret;
158
unsigned int val = 0;
159
160
if (!acpi_cpufreq_driver.set_boost)
161
return -EINVAL;
162
163
ret = kstrtouint(buf, 10, &val);
164
if (ret || val > 1)
165
return -EINVAL;
166
167
cpus_read_lock();
168
set_boost(policy, val);
169
cpus_read_unlock();
170
171
return count;
172
}
173
174
static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
175
{
176
return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
177
}
178
179
cpufreq_freq_attr_rw(cpb);
180
#endif
181
182
static int check_est_cpu(unsigned int cpuid)
183
{
184
struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
185
186
return cpu_has(cpu, X86_FEATURE_EST);
187
}
188
189
static int check_amd_hwpstate_cpu(unsigned int cpuid)
190
{
191
struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
192
193
return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
194
}
195
196
static unsigned extract_io(struct cpufreq_policy *policy, u32 value)
197
{
198
struct acpi_cpufreq_data *data = policy->driver_data;
199
struct acpi_processor_performance *perf;
200
int i;
201
202
perf = to_perf_data(data);
203
204
for (i = 0; i < perf->state_count; i++) {
205
if (value == perf->states[i].status)
206
return policy->freq_table[i].frequency;
207
}
208
return 0;
209
}
210
211
static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr)
212
{
213
struct acpi_cpufreq_data *data = policy->driver_data;
214
struct cpufreq_frequency_table *pos;
215
struct acpi_processor_performance *perf;
216
217
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
218
msr &= AMD_MSR_RANGE;
219
else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
220
msr &= HYGON_MSR_RANGE;
221
else
222
msr &= INTEL_MSR_RANGE;
223
224
perf = to_perf_data(data);
225
226
cpufreq_for_each_entry(pos, policy->freq_table)
227
if (msr == perf->states[pos->driver_data].status)
228
return pos->frequency;
229
return policy->freq_table[0].frequency;
230
}
231
232
static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
233
{
234
struct acpi_cpufreq_data *data = policy->driver_data;
235
236
switch (data->cpu_feature) {
237
case SYSTEM_INTEL_MSR_CAPABLE:
238
case SYSTEM_AMD_MSR_CAPABLE:
239
return extract_msr(policy, val);
240
case SYSTEM_IO_CAPABLE:
241
return extract_io(policy, val);
242
default:
243
return 0;
244
}
245
}
246
247
static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
248
{
249
u32 val, dummy __always_unused;
250
251
rdmsr(MSR_IA32_PERF_CTL, val, dummy);
252
return val;
253
}
254
255
static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
256
{
257
u32 lo, hi;
258
259
rdmsr(MSR_IA32_PERF_CTL, lo, hi);
260
lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE);
261
wrmsr(MSR_IA32_PERF_CTL, lo, hi);
262
}
263
264
static u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
265
{
266
u32 val, dummy __always_unused;
267
268
rdmsr(MSR_AMD_PERF_CTL, val, dummy);
269
return val;
270
}
271
272
static void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
273
{
274
wrmsr(MSR_AMD_PERF_CTL, val, 0);
275
}
276
277
static u32 cpu_freq_read_io(struct acpi_pct_register *reg)
278
{
279
u32 val;
280
281
acpi_os_read_port(reg->address, &val, reg->bit_width);
282
return val;
283
}
284
285
static void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
286
{
287
acpi_os_write_port(reg->address, val, reg->bit_width);
288
}
289
290
struct drv_cmd {
291
struct acpi_pct_register *reg;
292
u32 val;
293
union {
294
void (*write)(struct acpi_pct_register *reg, u32 val);
295
u32 (*read)(struct acpi_pct_register *reg);
296
} func;
297
};
298
299
/* Called via smp_call_function_single(), on the target CPU */
300
static void do_drv_read(void *_cmd)
301
{
302
struct drv_cmd *cmd = _cmd;
303
304
cmd->val = cmd->func.read(cmd->reg);
305
}
306
307
static u32 drv_read(struct acpi_cpufreq_data *data, const struct cpumask *mask)
308
{
309
struct acpi_processor_performance *perf = to_perf_data(data);
310
struct drv_cmd cmd = {
311
.reg = &perf->control_register,
312
.func.read = data->cpu_freq_read,
313
};
314
int err;
315
316
err = smp_call_function_any(mask, do_drv_read, &cmd, 1);
317
WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */
318
return cmd.val;
319
}
320
321
static void do_drv_write(void *_cmd)
322
{
323
struct drv_cmd *cmd = _cmd;
324
325
cmd->func.write(cmd->reg, cmd->val);
326
}
327
328
static void drv_write(struct acpi_cpufreq_data *data,
329
const struct cpumask *mask, u32 val)
330
{
331
struct acpi_processor_performance *perf = to_perf_data(data);
332
struct drv_cmd cmd = {
333
.reg = &perf->control_register,
334
.val = val,
335
.func.write = data->cpu_freq_write,
336
};
337
338
on_each_cpu_mask(mask, do_drv_write, &cmd, true);
339
}
340
341
static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
342
{
343
u32 val;
344
345
if (unlikely(cpumask_empty(mask)))
346
return 0;
347
348
val = drv_read(data, mask);
349
350
pr_debug("%s = %u\n", __func__, val);
351
352
return val;
353
}
354
355
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
356
{
357
struct acpi_cpufreq_data *data;
358
struct cpufreq_policy *policy;
359
unsigned int freq;
360
unsigned int cached_freq;
361
362
pr_debug("%s (%d)\n", __func__, cpu);
363
364
policy = cpufreq_cpu_get_raw(cpu);
365
if (unlikely(!policy))
366
return 0;
367
368
data = policy->driver_data;
369
if (unlikely(!data || !policy->freq_table))
370
return 0;
371
372
cached_freq = policy->freq_table[to_perf_data(data)->state].frequency;
373
freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
374
if (freq != cached_freq) {
375
/*
376
* The dreaded BIOS frequency change behind our back.
377
* Force set the frequency on next target call.
378
*/
379
data->resume = 1;
380
}
381
382
pr_debug("cur freq = %u\n", freq);
383
384
return freq;
385
}
386
387
static unsigned int check_freqs(struct cpufreq_policy *policy,
388
const struct cpumask *mask, unsigned int freq)
389
{
390
struct acpi_cpufreq_data *data = policy->driver_data;
391
unsigned int cur_freq;
392
unsigned int i;
393
394
for (i = 0; i < 100; i++) {
395
cur_freq = extract_freq(policy, get_cur_val(mask, data));
396
if (cur_freq == freq)
397
return 1;
398
udelay(10);
399
}
400
return 0;
401
}
402
403
static int acpi_cpufreq_target(struct cpufreq_policy *policy,
404
unsigned int index)
405
{
406
struct acpi_cpufreq_data *data = policy->driver_data;
407
struct acpi_processor_performance *perf;
408
const struct cpumask *mask;
409
unsigned int next_perf_state = 0; /* Index into perf table */
410
int result = 0;
411
412
if (unlikely(!data)) {
413
return -ENODEV;
414
}
415
416
perf = to_perf_data(data);
417
next_perf_state = policy->freq_table[index].driver_data;
418
if (perf->state == next_perf_state) {
419
if (unlikely(data->resume)) {
420
pr_debug("Called after resume, resetting to P%d\n",
421
next_perf_state);
422
data->resume = 0;
423
} else {
424
pr_debug("Already at target state (P%d)\n",
425
next_perf_state);
426
return 0;
427
}
428
}
429
430
/*
431
* The core won't allow CPUs to go away until the governor has been
432
* stopped, so we can rely on the stability of policy->cpus.
433
*/
434
mask = policy->shared_type == CPUFREQ_SHARED_TYPE_ANY ?
435
cpumask_of(policy->cpu) : policy->cpus;
436
437
drv_write(data, mask, perf->states[next_perf_state].control);
438
439
if (acpi_pstate_strict) {
440
if (!check_freqs(policy, mask,
441
policy->freq_table[index].frequency)) {
442
pr_debug("%s (%d)\n", __func__, policy->cpu);
443
result = -EAGAIN;
444
}
445
}
446
447
if (!result)
448
perf->state = next_perf_state;
449
450
return result;
451
}
452
453
static unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
454
unsigned int target_freq)
455
{
456
struct acpi_cpufreq_data *data = policy->driver_data;
457
struct acpi_processor_performance *perf;
458
struct cpufreq_frequency_table *entry;
459
unsigned int next_perf_state, next_freq, index;
460
461
/*
462
* Find the closest frequency above target_freq.
463
*/
464
if (policy->cached_target_freq == target_freq)
465
index = policy->cached_resolved_idx;
466
else
467
index = cpufreq_table_find_index_dl(policy, target_freq,
468
false);
469
470
entry = &policy->freq_table[index];
471
next_freq = entry->frequency;
472
next_perf_state = entry->driver_data;
473
474
perf = to_perf_data(data);
475
if (perf->state == next_perf_state) {
476
if (unlikely(data->resume))
477
data->resume = 0;
478
else
479
return next_freq;
480
}
481
482
data->cpu_freq_write(&perf->control_register,
483
perf->states[next_perf_state].control);
484
perf->state = next_perf_state;
485
return next_freq;
486
}
487
488
static unsigned long
489
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
490
{
491
struct acpi_processor_performance *perf;
492
493
perf = to_perf_data(data);
494
if (cpu_khz) {
495
/* search the closest match to cpu_khz */
496
unsigned int i;
497
unsigned long freq;
498
unsigned long freqn = perf->states[0].core_frequency * 1000;
499
500
for (i = 0; i < (perf->state_count-1); i++) {
501
freq = freqn;
502
freqn = perf->states[i+1].core_frequency * 1000;
503
if ((2 * cpu_khz) > (freqn + freq)) {
504
perf->state = i;
505
return freq;
506
}
507
}
508
perf->state = perf->state_count-1;
509
return freqn;
510
} else {
511
/* assume CPU is at P0... */
512
perf->state = 0;
513
return perf->states[0].core_frequency * 1000;
514
}
515
}
516
517
static void free_acpi_perf_data(void)
518
{
519
unsigned int i;
520
521
/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
522
for_each_possible_cpu(i)
523
free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
524
->shared_cpu_map);
525
free_percpu(acpi_perf_data);
526
}
527
528
static int cpufreq_boost_down_prep(unsigned int cpu)
529
{
530
/*
531
* Clear the boost-disable bit on the CPU_DOWN path so that
532
* this cpu cannot block the remaining ones from boosting.
533
*/
534
return boost_set_msr(1);
535
}
536
537
/*
538
* acpi_cpufreq_early_init - initialize ACPI P-States library
539
*
540
* Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
541
* in order to determine correct frequency and voltage pairings. We can
542
* do _PDC and _PSD and find out the processor dependency for the
543
* actual init that will happen later...
544
*/
545
static int __init acpi_cpufreq_early_init(void)
546
{
547
unsigned int i;
548
pr_debug("%s\n", __func__);
549
550
acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
551
if (!acpi_perf_data) {
552
pr_debug("Memory allocation error for acpi_perf_data.\n");
553
return -ENOMEM;
554
}
555
for_each_possible_cpu(i) {
556
if (!zalloc_cpumask_var_node(
557
&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
558
GFP_KERNEL, cpu_to_node(i))) {
559
560
/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
561
free_acpi_perf_data();
562
return -ENOMEM;
563
}
564
}
565
566
/* Do initialization in ACPI core */
567
acpi_processor_preregister_performance(acpi_perf_data);
568
return 0;
569
}
570
571
#ifdef CONFIG_SMP
572
/*
573
* Some BIOSes do SW_ANY coordination internally, either set it up in hw
574
* or do it in BIOS firmware and won't inform about it to OS. If not
575
* detected, this has a side effect of making CPU run at a different speed
576
* than OS intended it to run at. Detect it and handle it cleanly.
577
*/
578
static int bios_with_sw_any_bug;
579
580
static int sw_any_bug_found(const struct dmi_system_id *d)
581
{
582
bios_with_sw_any_bug = 1;
583
return 0;
584
}
585
586
static const struct dmi_system_id sw_any_bug_dmi_table[] = {
587
{
588
.callback = sw_any_bug_found,
589
.ident = "Supermicro Server X6DLP",
590
.matches = {
591
DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
592
DMI_MATCH(DMI_BIOS_VERSION, "080010"),
593
DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
594
},
595
},
596
{ }
597
};
598
599
static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
600
{
601
/* Intel Xeon Processor 7100 Series Specification Update
602
* https://www.intel.com/Assets/PDF/specupdate/314554.pdf
603
* AL30: A Machine Check Exception (MCE) Occurring during an
604
* Enhanced Intel SpeedStep Technology Ratio Change May Cause
605
* Both Processor Cores to Lock Up. */
606
if (c->x86_vendor == X86_VENDOR_INTEL) {
607
if ((c->x86 == 15) &&
608
(c->x86_model == 6) &&
609
(c->x86_stepping == 8)) {
610
pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n");
611
return -ENODEV;
612
}
613
}
614
return 0;
615
}
616
#endif
617
618
#ifdef CONFIG_ACPI_CPPC_LIB
619
/*
620
* get_max_boost_ratio: Computes the max_boost_ratio as the ratio
621
* between the highest_perf and the nominal_perf.
622
*
623
* Returns the max_boost_ratio for @cpu. Returns the CPPC nominal
624
* frequency via @nominal_freq if it is non-NULL pointer.
625
*/
626
static u64 get_max_boost_ratio(unsigned int cpu, u64 *nominal_freq)
627
{
628
struct cppc_perf_caps perf_caps;
629
u64 highest_perf, nominal_perf;
630
int ret;
631
632
if (acpi_pstate_strict)
633
return 0;
634
635
ret = cppc_get_perf_caps(cpu, &perf_caps);
636
if (ret) {
637
pr_debug("CPU%d: Unable to get performance capabilities (%d)\n",
638
cpu, ret);
639
return 0;
640
}
641
642
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
643
ret = amd_get_boost_ratio_numerator(cpu, &highest_perf);
644
if (ret) {
645
pr_debug("CPU%d: Unable to get boost ratio numerator (%d)\n",
646
cpu, ret);
647
return 0;
648
}
649
} else {
650
highest_perf = perf_caps.highest_perf;
651
}
652
653
nominal_perf = perf_caps.nominal_perf;
654
655
if (nominal_freq)
656
*nominal_freq = perf_caps.nominal_freq * 1000;
657
658
if (!highest_perf || !nominal_perf) {
659
pr_debug("CPU%d: highest or nominal performance missing\n", cpu);
660
return 0;
661
}
662
663
if (highest_perf < nominal_perf) {
664
pr_debug("CPU%d: nominal performance above highest\n", cpu);
665
return 0;
666
}
667
668
return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
669
}
670
671
#else
672
static inline u64 get_max_boost_ratio(unsigned int cpu, u64 *nominal_freq)
673
{
674
return 0;
675
}
676
#endif
677
678
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
679
{
680
struct cpufreq_frequency_table *freq_table;
681
struct acpi_processor_performance *perf;
682
struct acpi_cpufreq_data *data;
683
unsigned int cpu = policy->cpu;
684
struct cpuinfo_x86 *c = &cpu_data(cpu);
685
u64 max_boost_ratio, nominal_freq = 0;
686
unsigned int valid_states = 0;
687
unsigned int result = 0;
688
unsigned int i;
689
#ifdef CONFIG_SMP
690
static int blacklisted;
691
#endif
692
693
pr_debug("%s\n", __func__);
694
695
#ifdef CONFIG_SMP
696
if (blacklisted)
697
return blacklisted;
698
blacklisted = acpi_cpufreq_blacklist(c);
699
if (blacklisted)
700
return blacklisted;
701
#endif
702
703
data = kzalloc(sizeof(*data), GFP_KERNEL);
704
if (!data)
705
return -ENOMEM;
706
707
if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
708
result = -ENOMEM;
709
goto err_free;
710
}
711
712
perf = per_cpu_ptr(acpi_perf_data, cpu);
713
data->acpi_perf_cpu = cpu;
714
policy->driver_data = data;
715
716
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
717
acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
718
719
result = acpi_processor_register_performance(perf, cpu);
720
if (result)
721
goto err_free_mask;
722
723
policy->shared_type = perf->shared_type;
724
725
/*
726
* Will let policy->cpus know about dependency only when software
727
* coordination is required.
728
*/
729
if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
730
policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
731
cpumask_copy(policy->cpus, perf->shared_cpu_map);
732
}
733
cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
734
735
#ifdef CONFIG_SMP
736
dmi_check_system(sw_any_bug_dmi_table);
737
if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
738
policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
739
cpumask_copy(policy->cpus, topology_core_cpumask(cpu));
740
}
741
742
if (check_amd_hwpstate_cpu(cpu) && boot_cpu_data.x86 < 0x19 &&
743
!acpi_pstate_strict) {
744
cpumask_clear(policy->cpus);
745
cpumask_set_cpu(cpu, policy->cpus);
746
cpumask_copy(data->freqdomain_cpus,
747
topology_sibling_cpumask(cpu));
748
policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
749
pr_info_once("overriding BIOS provided _PSD data\n");
750
}
751
#endif
752
753
/* capability check */
754
if (perf->state_count <= 1) {
755
pr_debug("No P-States\n");
756
result = -ENODEV;
757
goto err_unreg;
758
}
759
760
if (perf->control_register.space_id != perf->status_register.space_id) {
761
result = -ENODEV;
762
goto err_unreg;
763
}
764
765
switch (perf->control_register.space_id) {
766
case ACPI_ADR_SPACE_SYSTEM_IO:
767
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
768
boot_cpu_data.x86 == 0xf) {
769
pr_debug("AMD K8 systems must use native drivers.\n");
770
result = -ENODEV;
771
goto err_unreg;
772
}
773
pr_debug("SYSTEM IO addr space\n");
774
data->cpu_feature = SYSTEM_IO_CAPABLE;
775
data->cpu_freq_read = cpu_freq_read_io;
776
data->cpu_freq_write = cpu_freq_write_io;
777
break;
778
case ACPI_ADR_SPACE_FIXED_HARDWARE:
779
pr_debug("HARDWARE addr space\n");
780
if (check_est_cpu(cpu)) {
781
data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
782
data->cpu_freq_read = cpu_freq_read_intel;
783
data->cpu_freq_write = cpu_freq_write_intel;
784
break;
785
}
786
if (check_amd_hwpstate_cpu(cpu)) {
787
data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
788
data->cpu_freq_read = cpu_freq_read_amd;
789
data->cpu_freq_write = cpu_freq_write_amd;
790
break;
791
}
792
result = -ENODEV;
793
goto err_unreg;
794
default:
795
pr_debug("Unknown addr space %d\n",
796
(u32) (perf->control_register.space_id));
797
result = -ENODEV;
798
goto err_unreg;
799
}
800
801
freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table),
802
GFP_KERNEL);
803
if (!freq_table) {
804
result = -ENOMEM;
805
goto err_unreg;
806
}
807
808
/* detect transition latency */
809
policy->cpuinfo.transition_latency = 0;
810
for (i = 0; i < perf->state_count; i++) {
811
if ((perf->states[i].transition_latency * 1000) >
812
policy->cpuinfo.transition_latency)
813
policy->cpuinfo.transition_latency =
814
perf->states[i].transition_latency * 1000;
815
}
816
817
/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
818
if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
819
policy->cpuinfo.transition_latency > 20 * 1000) {
820
policy->cpuinfo.transition_latency = 20 * 1000;
821
pr_info_once("P-state transition latency capped at 20 uS\n");
822
}
823
824
/* table init */
825
for (i = 0; i < perf->state_count; i++) {
826
if (i > 0 && perf->states[i].core_frequency >=
827
freq_table[valid_states-1].frequency / 1000)
828
continue;
829
830
freq_table[valid_states].driver_data = i;
831
freq_table[valid_states].frequency =
832
perf->states[i].core_frequency * 1000;
833
valid_states++;
834
}
835
freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
836
837
max_boost_ratio = get_max_boost_ratio(cpu, &nominal_freq);
838
if (max_boost_ratio) {
839
unsigned int freq = nominal_freq;
840
841
/*
842
* The loop above sorts the freq_table entries in the
843
* descending order. If ACPI CPPC has not advertised
844
* the nominal frequency (this is possible in CPPC
845
* revisions prior to 3), then use the first entry in
846
* the pstate table as a proxy for nominal frequency.
847
*/
848
if (!freq)
849
freq = freq_table[0].frequency;
850
851
policy->cpuinfo.max_freq = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
852
} else {
853
/*
854
* If the maximum "boost" frequency is unknown, ask the arch
855
* scale-invariance code to use the "nominal" performance for
856
* CPU utilization scaling so as to prevent the schedutil
857
* governor from selecting inadequate CPU frequencies.
858
*/
859
arch_set_max_freq_ratio(true);
860
}
861
862
policy->freq_table = freq_table;
863
perf->state = 0;
864
865
switch (perf->control_register.space_id) {
866
case ACPI_ADR_SPACE_SYSTEM_IO:
867
/*
868
* The core will not set policy->cur, because
869
* cpufreq_driver->get is NULL, so we need to set it here.
870
* However, we have to guess it, because the current speed is
871
* unknown and not detectable via IO ports.
872
*/
873
policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
874
break;
875
case ACPI_ADR_SPACE_FIXED_HARDWARE:
876
acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
877
break;
878
default:
879
break;
880
}
881
882
/* notify BIOS that we exist */
883
acpi_processor_notify_smm(THIS_MODULE);
884
885
pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
886
for (i = 0; i < perf->state_count; i++)
887
pr_debug(" %cP%d: %d MHz, %d mW, %d uS\n",
888
(i == perf->state ? '*' : ' '), i,
889
(u32) perf->states[i].core_frequency,
890
(u32) perf->states[i].power,
891
(u32) perf->states[i].transition_latency);
892
893
/*
894
* the first call to ->target() should result in us actually
895
* writing something to the appropriate registers.
896
*/
897
data->resume = 1;
898
899
policy->fast_switch_possible = !acpi_pstate_strict &&
900
!(policy_is_shared(policy) && policy->shared_type != CPUFREQ_SHARED_TYPE_ANY);
901
902
if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency)
903
pr_warn(FW_WARN "P-state 0 is not max freq\n");
904
905
if (acpi_cpufreq_driver.set_boost) {
906
if (policy->boost_supported) {
907
/*
908
* The firmware may have altered boost state while the
909
* CPU was offline (for example during a suspend-resume
910
* cycle).
911
*/
912
if (policy->boost_enabled != boost_state(cpu))
913
set_boost(policy, policy->boost_enabled);
914
} else {
915
policy->boost_supported = true;
916
}
917
}
918
919
return result;
920
921
err_unreg:
922
acpi_processor_unregister_performance(cpu);
923
err_free_mask:
924
free_cpumask_var(data->freqdomain_cpus);
925
err_free:
926
kfree(data);
927
policy->driver_data = NULL;
928
929
return result;
930
}
931
932
static void acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
933
{
934
struct acpi_cpufreq_data *data = policy->driver_data;
935
936
pr_debug("%s\n", __func__);
937
938
cpufreq_boost_down_prep(policy->cpu);
939
policy->fast_switch_possible = false;
940
policy->driver_data = NULL;
941
acpi_processor_unregister_performance(data->acpi_perf_cpu);
942
free_cpumask_var(data->freqdomain_cpus);
943
kfree(policy->freq_table);
944
kfree(data);
945
}
946
947
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
948
{
949
struct acpi_cpufreq_data *data = policy->driver_data;
950
951
pr_debug("%s\n", __func__);
952
953
data->resume = 1;
954
955
return 0;
956
}
957
958
static struct freq_attr *acpi_cpufreq_attr[] = {
959
&freqdomain_cpus,
960
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
961
&cpb,
962
#endif
963
NULL,
964
};
965
966
static struct cpufreq_driver acpi_cpufreq_driver = {
967
.verify = cpufreq_generic_frequency_table_verify,
968
.target_index = acpi_cpufreq_target,
969
.fast_switch = acpi_cpufreq_fast_switch,
970
.bios_limit = acpi_processor_get_bios_limit,
971
.init = acpi_cpufreq_cpu_init,
972
.exit = acpi_cpufreq_cpu_exit,
973
.resume = acpi_cpufreq_resume,
974
.name = "acpi-cpufreq",
975
.attr = acpi_cpufreq_attr,
976
};
977
978
static void __init acpi_cpufreq_boost_init(void)
979
{
980
if (!(boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA))) {
981
pr_debug("Boost capabilities not present in the processor\n");
982
return;
983
}
984
985
acpi_cpufreq_driver.set_boost = set_boost;
986
acpi_cpufreq_driver.boost_enabled = boost_state(0);
987
}
988
989
static int __init acpi_cpufreq_probe(struct platform_device *pdev)
990
{
991
int ret;
992
993
if (acpi_disabled)
994
return -ENODEV;
995
996
/* don't keep reloading if cpufreq_driver exists */
997
if (cpufreq_get_current_driver())
998
return -ENODEV;
999
1000
pr_debug("%s\n", __func__);
1001
1002
ret = acpi_cpufreq_early_init();
1003
if (ret)
1004
return ret;
1005
1006
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
1007
/* this is a sysfs file with a strange name and an even stranger
1008
* semantic - per CPU instantiation, but system global effect.
1009
* Lets enable it only on AMD CPUs for compatibility reasons and
1010
* only if configured. This is considered legacy code, which
1011
* will probably be removed at some point in the future.
1012
*/
1013
if (!check_amd_hwpstate_cpu(0)) {
1014
struct freq_attr **attr;
1015
1016
pr_debug("CPB unsupported, do not expose it\n");
1017
1018
for (attr = acpi_cpufreq_attr; *attr; attr++)
1019
if (*attr == &cpb) {
1020
*attr = NULL;
1021
break;
1022
}
1023
}
1024
#endif
1025
acpi_cpufreq_boost_init();
1026
1027
ret = cpufreq_register_driver(&acpi_cpufreq_driver);
1028
if (ret) {
1029
free_acpi_perf_data();
1030
}
1031
return ret;
1032
}
1033
1034
static void acpi_cpufreq_remove(struct platform_device *pdev)
1035
{
1036
pr_debug("%s\n", __func__);
1037
1038
cpufreq_unregister_driver(&acpi_cpufreq_driver);
1039
1040
free_acpi_perf_data();
1041
}
1042
1043
static struct platform_driver acpi_cpufreq_platdrv = {
1044
.driver = {
1045
.name = "acpi-cpufreq",
1046
},
1047
.remove = acpi_cpufreq_remove,
1048
};
1049
1050
static int __init acpi_cpufreq_init(void)
1051
{
1052
return platform_driver_probe(&acpi_cpufreq_platdrv, acpi_cpufreq_probe);
1053
}
1054
1055
static void __exit acpi_cpufreq_exit(void)
1056
{
1057
platform_driver_unregister(&acpi_cpufreq_platdrv);
1058
}
1059
1060
module_param(acpi_pstate_strict, uint, 0644);
1061
MODULE_PARM_DESC(acpi_pstate_strict,
1062
"value 0 or non-zero. non-zero -> strict ACPI checks are "
1063
"performed during frequency changes.");
1064
1065
late_initcall(acpi_cpufreq_init);
1066
module_exit(acpi_cpufreq_exit);
1067
1068
MODULE_ALIAS("platform:acpi-cpufreq");
1069
1070