Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/coco/sev/core.c
29271 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* AMD Memory Encryption Support
4
*
5
* Copyright (C) 2019 SUSE
6
*
7
* Author: Joerg Roedel <[email protected]>
8
*/
9
10
#define pr_fmt(fmt) "SEV: " fmt
11
12
#include <linux/sched/debug.h> /* For show_regs() */
13
#include <linux/percpu-defs.h>
14
#include <linux/cc_platform.h>
15
#include <linux/printk.h>
16
#include <linux/mm_types.h>
17
#include <linux/set_memory.h>
18
#include <linux/memblock.h>
19
#include <linux/kernel.h>
20
#include <linux/mm.h>
21
#include <linux/cpumask.h>
22
#include <linux/efi.h>
23
#include <linux/platform_device.h>
24
#include <linux/io.h>
25
#include <linux/psp-sev.h>
26
#include <linux/dmi.h>
27
#include <uapi/linux/sev-guest.h>
28
#include <crypto/gcm.h>
29
30
#include <asm/init.h>
31
#include <asm/cpu_entry_area.h>
32
#include <asm/stacktrace.h>
33
#include <asm/sev.h>
34
#include <asm/sev-internal.h>
35
#include <asm/insn-eval.h>
36
#include <asm/fpu/xcr.h>
37
#include <asm/processor.h>
38
#include <asm/realmode.h>
39
#include <asm/setup.h>
40
#include <asm/traps.h>
41
#include <asm/svm.h>
42
#include <asm/smp.h>
43
#include <asm/cpu.h>
44
#include <asm/apic.h>
45
#include <asm/cpuid/api.h>
46
#include <asm/cmdline.h>
47
#include <asm/msr.h>
48
49
/* Bitmap of SEV features supported by the hypervisor */
50
u64 sev_hv_features __ro_after_init;
51
SYM_PIC_ALIAS(sev_hv_features);
52
53
/* Secrets page physical address from the CC blob */
54
u64 sev_secrets_pa __ro_after_init;
55
SYM_PIC_ALIAS(sev_secrets_pa);
56
57
/* For early boot SVSM communication */
58
struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
59
SYM_PIC_ALIAS(boot_svsm_ca_page);
60
61
/*
62
* SVSM related information:
63
* During boot, the page tables are set up as identity mapped and later
64
* changed to use kernel virtual addresses. Maintain separate virtual and
65
* physical addresses for the CAA to allow SVSM functions to be used during
66
* early boot, both with identity mapped virtual addresses and proper kernel
67
* virtual addresses.
68
*/
69
u64 boot_svsm_caa_pa __ro_after_init;
70
SYM_PIC_ALIAS(boot_svsm_caa_pa);
71
72
DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
73
DEFINE_PER_CPU(u64, svsm_caa_pa);
74
75
static inline struct svsm_ca *svsm_get_caa(void)
76
{
77
if (sev_cfg.use_cas)
78
return this_cpu_read(svsm_caa);
79
else
80
return rip_rel_ptr(&boot_svsm_ca_page);
81
}
82
83
static inline u64 svsm_get_caa_pa(void)
84
{
85
if (sev_cfg.use_cas)
86
return this_cpu_read(svsm_caa_pa);
87
else
88
return boot_svsm_caa_pa;
89
}
90
91
/* AP INIT values as documented in the APM2 section "Processor Initialization State" */
92
#define AP_INIT_CS_LIMIT 0xffff
93
#define AP_INIT_DS_LIMIT 0xffff
94
#define AP_INIT_LDTR_LIMIT 0xffff
95
#define AP_INIT_GDTR_LIMIT 0xffff
96
#define AP_INIT_IDTR_LIMIT 0xffff
97
#define AP_INIT_TR_LIMIT 0xffff
98
#define AP_INIT_RFLAGS_DEFAULT 0x2
99
#define AP_INIT_DR6_DEFAULT 0xffff0ff0
100
#define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
101
#define AP_INIT_XCR0_DEFAULT 0x1
102
#define AP_INIT_X87_FTW_DEFAULT 0x5555
103
#define AP_INIT_X87_FCW_DEFAULT 0x0040
104
#define AP_INIT_CR0_DEFAULT 0x60000010
105
#define AP_INIT_MXCSR_DEFAULT 0x1f80
106
107
static const char * const sev_status_feat_names[] = {
108
[MSR_AMD64_SEV_ENABLED_BIT] = "SEV",
109
[MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES",
110
[MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP",
111
[MSR_AMD64_SNP_VTOM_BIT] = "vTom",
112
[MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC",
113
[MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI",
114
[MSR_AMD64_SNP_ALT_INJ_BIT] = "AI",
115
[MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap",
116
[MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS",
117
[MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol",
118
[MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS",
119
[MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC",
120
[MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam",
121
[MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt",
122
[MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt",
123
[MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
124
[MSR_AMD64_SNP_SECURE_AVIC_BIT] = "SecureAVIC",
125
};
126
127
/*
128
* For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and
129
* initializes snp_tsc_scale and snp_tsc_offset. These values are replicated
130
* across the APs VMSA fields (TSC_SCALE and TSC_OFFSET).
131
*/
132
static u64 snp_tsc_scale __ro_after_init;
133
static u64 snp_tsc_offset __ro_after_init;
134
static unsigned long snp_tsc_freq_khz __ro_after_init;
135
136
DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
137
DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
138
139
/*
140
* SVSM related information:
141
* When running under an SVSM, the VMPL that Linux is executing at must be
142
* non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
143
*/
144
u8 snp_vmpl __ro_after_init;
145
EXPORT_SYMBOL_GPL(snp_vmpl);
146
SYM_PIC_ALIAS(snp_vmpl);
147
148
/*
149
* Since feature negotiation related variables are set early in the boot
150
* process they must reside in the .data section so as not to be zeroed
151
* out when the .bss section is later cleared.
152
*
153
* GHCB protocol version negotiated with the hypervisor.
154
*/
155
u16 ghcb_version __ro_after_init;
156
SYM_PIC_ALIAS(ghcb_version);
157
158
/* For early boot hypervisor communication in SEV-ES enabled guests */
159
static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
160
161
/*
162
* Needs to be in the .data section because we need it NULL before bss is
163
* cleared
164
*/
165
struct ghcb *boot_ghcb __section(".data");
166
167
static u64 __init get_snp_jump_table_addr(void)
168
{
169
struct snp_secrets_page *secrets;
170
void __iomem *mem;
171
u64 addr;
172
173
mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
174
if (!mem) {
175
pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
176
return 0;
177
}
178
179
secrets = (__force struct snp_secrets_page *)mem;
180
181
addr = secrets->os_area.ap_jump_table_pa;
182
iounmap(mem);
183
184
return addr;
185
}
186
187
static u64 __init get_jump_table_addr(void)
188
{
189
struct ghcb_state state;
190
unsigned long flags;
191
struct ghcb *ghcb;
192
u64 ret = 0;
193
194
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
195
return get_snp_jump_table_addr();
196
197
local_irq_save(flags);
198
199
ghcb = __sev_get_ghcb(&state);
200
201
vc_ghcb_invalidate(ghcb);
202
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
203
ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
204
ghcb_set_sw_exit_info_2(ghcb, 0);
205
206
sev_es_wr_ghcb_msr(__pa(ghcb));
207
VMGEXIT();
208
209
if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
210
ghcb_sw_exit_info_2_is_valid(ghcb))
211
ret = ghcb->save.sw_exit_info_2;
212
213
__sev_put_ghcb(&state);
214
215
local_irq_restore(flags);
216
217
return ret;
218
}
219
220
static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
221
{
222
struct es_em_ctxt ctxt;
223
u8 pending = 0;
224
225
vc_ghcb_invalidate(ghcb);
226
227
/*
228
* Fill in protocol and format specifiers. This can be called very early
229
* in the boot, so use rip-relative references as needed.
230
*/
231
ghcb->protocol_version = ghcb_version;
232
ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
233
234
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL);
235
ghcb_set_sw_exit_info_1(ghcb, 0);
236
ghcb_set_sw_exit_info_2(ghcb, 0);
237
238
sev_es_wr_ghcb_msr(__pa(ghcb));
239
240
svsm_issue_call(call, &pending);
241
242
if (pending)
243
return -EINVAL;
244
245
switch (verify_exception_info(ghcb, &ctxt)) {
246
case ES_OK:
247
break;
248
case ES_EXCEPTION:
249
vc_forward_exception(&ctxt);
250
fallthrough;
251
default:
252
return -EINVAL;
253
}
254
255
return svsm_process_result_codes(call);
256
}
257
258
static int svsm_perform_call_protocol(struct svsm_call *call)
259
{
260
struct ghcb_state state;
261
unsigned long flags;
262
struct ghcb *ghcb;
263
int ret;
264
265
flags = native_local_irq_save();
266
267
if (sev_cfg.ghcbs_initialized)
268
ghcb = __sev_get_ghcb(&state);
269
else if (boot_ghcb)
270
ghcb = boot_ghcb;
271
else
272
ghcb = NULL;
273
274
do {
275
ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
276
: __pi_svsm_perform_msr_protocol(call);
277
} while (ret == -EAGAIN);
278
279
if (sev_cfg.ghcbs_initialized)
280
__sev_put_ghcb(&state);
281
282
native_local_irq_restore(flags);
283
284
return ret;
285
}
286
287
static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size,
288
int ret, u64 svsm_ret)
289
{
290
WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n",
291
pfn, action, page_size, ret, svsm_ret);
292
293
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
294
}
295
296
static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret)
297
{
298
unsigned int page_size;
299
bool action;
300
u64 pfn;
301
302
pfn = pc->entry[pc->cur_index].pfn;
303
action = pc->entry[pc->cur_index].action;
304
page_size = pc->entry[pc->cur_index].page_size;
305
306
__pval_terminate(pfn, action, page_size, ret, svsm_ret);
307
}
308
309
static void pval_pages(struct snp_psc_desc *desc)
310
{
311
struct psc_entry *e;
312
unsigned long vaddr;
313
unsigned int size;
314
unsigned int i;
315
bool validate;
316
u64 pfn;
317
int rc;
318
319
for (i = 0; i <= desc->hdr.end_entry; i++) {
320
e = &desc->entries[i];
321
322
pfn = e->gfn;
323
vaddr = (unsigned long)pfn_to_kaddr(pfn);
324
size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
325
validate = e->operation == SNP_PAGE_STATE_PRIVATE;
326
327
rc = pvalidate(vaddr, size, validate);
328
if (!rc)
329
continue;
330
331
if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
332
unsigned long vaddr_end = vaddr + PMD_SIZE;
333
334
for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
335
rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
336
if (rc)
337
__pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0);
338
}
339
} else {
340
__pval_terminate(pfn, validate, size, rc, 0);
341
}
342
}
343
}
344
345
static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
346
struct svsm_pvalidate_call *pc)
347
{
348
struct svsm_pvalidate_entry *pe;
349
350
/* Nothing in the CA yet */
351
pc->num_entries = 0;
352
pc->cur_index = 0;
353
354
pe = &pc->entry[0];
355
356
while (pfn < pfn_end) {
357
pe->page_size = RMP_PG_SIZE_4K;
358
pe->action = action;
359
pe->ignore_cf = 0;
360
pe->rsvd = 0;
361
pe->pfn = pfn;
362
363
pe++;
364
pfn++;
365
366
pc->num_entries++;
367
if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
368
break;
369
}
370
371
return pfn;
372
}
373
374
static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry,
375
struct svsm_pvalidate_call *pc)
376
{
377
struct svsm_pvalidate_entry *pe;
378
struct psc_entry *e;
379
380
/* Nothing in the CA yet */
381
pc->num_entries = 0;
382
pc->cur_index = 0;
383
384
pe = &pc->entry[0];
385
e = &desc->entries[desc_entry];
386
387
while (desc_entry <= desc->hdr.end_entry) {
388
pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
389
pe->action = e->operation == SNP_PAGE_STATE_PRIVATE;
390
pe->ignore_cf = 0;
391
pe->rsvd = 0;
392
pe->pfn = e->gfn;
393
394
pe++;
395
e++;
396
397
desc_entry++;
398
pc->num_entries++;
399
if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
400
break;
401
}
402
403
return desc_entry;
404
}
405
406
static void svsm_pval_pages(struct snp_psc_desc *desc)
407
{
408
struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY];
409
unsigned int i, pv_4k_count = 0;
410
struct svsm_pvalidate_call *pc;
411
struct svsm_call call = {};
412
unsigned long flags;
413
bool action;
414
u64 pc_pa;
415
int ret;
416
417
/*
418
* This can be called very early in the boot, use native functions in
419
* order to avoid paravirt issues.
420
*/
421
flags = native_local_irq_save();
422
423
/*
424
* The SVSM calling area (CA) can support processing 510 entries at a
425
* time. Loop through the Page State Change descriptor until the CA is
426
* full or the last entry in the descriptor is reached, at which time
427
* the SVSM is invoked. This repeats until all entries in the descriptor
428
* are processed.
429
*/
430
call.caa = svsm_get_caa();
431
432
pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
433
pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
434
435
/* Protocol 0, Call ID 1 */
436
call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
437
call.rcx = pc_pa;
438
439
for (i = 0; i <= desc->hdr.end_entry;) {
440
i = svsm_build_ca_from_psc_desc(desc, i, pc);
441
442
do {
443
ret = svsm_perform_call_protocol(&call);
444
if (!ret)
445
continue;
446
447
/*
448
* Check if the entry failed because of an RMP mismatch (a
449
* PVALIDATE at 2M was requested, but the page is mapped in
450
* the RMP as 4K).
451
*/
452
453
if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH &&
454
pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) {
455
/* Save this entry for post-processing at 4K */
456
pv_4k[pv_4k_count++] = pc->entry[pc->cur_index];
457
458
/* Skip to the next one unless at the end of the list */
459
pc->cur_index++;
460
if (pc->cur_index < pc->num_entries)
461
ret = -EAGAIN;
462
else
463
ret = 0;
464
}
465
} while (ret == -EAGAIN);
466
467
if (ret)
468
svsm_pval_terminate(pc, ret, call.rax_out);
469
}
470
471
/* Process any entries that failed to be validated at 2M and validate them at 4K */
472
for (i = 0; i < pv_4k_count; i++) {
473
u64 pfn, pfn_end;
474
475
action = pv_4k[i].action;
476
pfn = pv_4k[i].pfn;
477
pfn_end = pfn + 512;
478
479
while (pfn < pfn_end) {
480
pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc);
481
482
ret = svsm_perform_call_protocol(&call);
483
if (ret)
484
svsm_pval_terminate(pc, ret, call.rax_out);
485
}
486
}
487
488
native_local_irq_restore(flags);
489
}
490
491
static void pvalidate_pages(struct snp_psc_desc *desc)
492
{
493
struct psc_entry *e;
494
unsigned int i;
495
496
if (snp_vmpl)
497
svsm_pval_pages(desc);
498
else
499
pval_pages(desc);
500
501
/*
502
* If not affected by the cache-coherency vulnerability there is no need
503
* to perform the cache eviction mitigation.
504
*/
505
if (cpu_feature_enabled(X86_FEATURE_COHERENCY_SFW_NO))
506
return;
507
508
for (i = 0; i <= desc->hdr.end_entry; i++) {
509
e = &desc->entries[i];
510
511
/*
512
* If validating memory (making it private) perform the cache
513
* eviction mitigation.
514
*/
515
if (e->operation == SNP_PAGE_STATE_PRIVATE)
516
sev_evict_cache(pfn_to_kaddr(e->gfn), e->pagesize ? 512 : 1);
517
}
518
}
519
520
static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
521
{
522
int cur_entry, end_entry, ret = 0;
523
struct snp_psc_desc *data;
524
struct es_em_ctxt ctxt;
525
526
vc_ghcb_invalidate(ghcb);
527
528
/* Copy the input desc into GHCB shared buffer */
529
data = (struct snp_psc_desc *)ghcb->shared_buffer;
530
memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
531
532
/*
533
* As per the GHCB specification, the hypervisor can resume the guest
534
* before processing all the entries. Check whether all the entries
535
* are processed. If not, then keep retrying. Note, the hypervisor
536
* will update the data memory directly to indicate the status, so
537
* reference the data->hdr everywhere.
538
*
539
* The strategy here is to wait for the hypervisor to change the page
540
* state in the RMP table before guest accesses the memory pages. If the
541
* page state change was not successful, then later memory access will
542
* result in a crash.
543
*/
544
cur_entry = data->hdr.cur_entry;
545
end_entry = data->hdr.end_entry;
546
547
while (data->hdr.cur_entry <= data->hdr.end_entry) {
548
ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
549
550
/* This will advance the shared buffer data points to. */
551
ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
552
553
/*
554
* Page State Change VMGEXIT can pass error code through
555
* exit_info_2.
556
*/
557
if (WARN(ret || ghcb->save.sw_exit_info_2,
558
"SNP: PSC failed ret=%d exit_info_2=%llx\n",
559
ret, ghcb->save.sw_exit_info_2)) {
560
ret = 1;
561
goto out;
562
}
563
564
/* Verify that reserved bit is not set */
565
if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
566
ret = 1;
567
goto out;
568
}
569
570
/*
571
* Sanity check that entry processing is not going backwards.
572
* This will happen only if hypervisor is tricking us.
573
*/
574
if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
575
"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
576
end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
577
ret = 1;
578
goto out;
579
}
580
}
581
582
out:
583
return ret;
584
}
585
586
static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
587
unsigned long vaddr_end, int op)
588
{
589
struct ghcb_state state;
590
bool use_large_entry;
591
struct psc_hdr *hdr;
592
struct psc_entry *e;
593
unsigned long flags;
594
unsigned long pfn;
595
struct ghcb *ghcb;
596
int i;
597
598
hdr = &data->hdr;
599
e = data->entries;
600
601
memset(data, 0, sizeof(*data));
602
i = 0;
603
604
while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) {
605
hdr->end_entry = i;
606
607
if (is_vmalloc_addr((void *)vaddr)) {
608
pfn = vmalloc_to_pfn((void *)vaddr);
609
use_large_entry = false;
610
} else {
611
pfn = __pa(vaddr) >> PAGE_SHIFT;
612
use_large_entry = true;
613
}
614
615
e->gfn = pfn;
616
e->operation = op;
617
618
if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) &&
619
(vaddr_end - vaddr) >= PMD_SIZE) {
620
e->pagesize = RMP_PG_SIZE_2M;
621
vaddr += PMD_SIZE;
622
} else {
623
e->pagesize = RMP_PG_SIZE_4K;
624
vaddr += PAGE_SIZE;
625
}
626
627
e++;
628
i++;
629
}
630
631
/* Page validation must be rescinded before changing to shared */
632
if (op == SNP_PAGE_STATE_SHARED)
633
pvalidate_pages(data);
634
635
local_irq_save(flags);
636
637
if (sev_cfg.ghcbs_initialized)
638
ghcb = __sev_get_ghcb(&state);
639
else
640
ghcb = boot_ghcb;
641
642
/* Invoke the hypervisor to perform the page state changes */
643
if (!ghcb || vmgexit_psc(ghcb, data))
644
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
645
646
if (sev_cfg.ghcbs_initialized)
647
__sev_put_ghcb(&state);
648
649
local_irq_restore(flags);
650
651
/* Page validation must be performed after changing to private */
652
if (op == SNP_PAGE_STATE_PRIVATE)
653
pvalidate_pages(data);
654
655
return vaddr;
656
}
657
658
static void set_pages_state(unsigned long vaddr, unsigned long npages, int op)
659
{
660
struct snp_psc_desc desc;
661
unsigned long vaddr_end;
662
663
/* Use the MSR protocol when a GHCB is not available. */
664
if (!boot_ghcb) {
665
struct psc_desc d = { op, svsm_get_caa(), svsm_get_caa_pa() };
666
667
return early_set_pages_state(vaddr, __pa(vaddr), npages, &d);
668
}
669
670
vaddr = vaddr & PAGE_MASK;
671
vaddr_end = vaddr + (npages << PAGE_SHIFT);
672
673
while (vaddr < vaddr_end)
674
vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op);
675
}
676
677
void snp_set_memory_shared(unsigned long vaddr, unsigned long npages)
678
{
679
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
680
return;
681
682
set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
683
}
684
685
void snp_set_memory_private(unsigned long vaddr, unsigned long npages)
686
{
687
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
688
return;
689
690
set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
691
}
692
693
void snp_accept_memory(phys_addr_t start, phys_addr_t end)
694
{
695
unsigned long vaddr, npages;
696
697
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
698
return;
699
700
vaddr = (unsigned long)__va(start);
701
npages = (end - start) >> PAGE_SHIFT;
702
703
set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
704
}
705
706
static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id)
707
{
708
bool create = event != SVM_VMGEXIT_AP_DESTROY;
709
struct ghcb_state state;
710
unsigned long flags;
711
struct ghcb *ghcb;
712
int ret = 0;
713
714
local_irq_save(flags);
715
716
ghcb = __sev_get_ghcb(&state);
717
718
vc_ghcb_invalidate(ghcb);
719
720
if (create)
721
ghcb_set_rax(ghcb, vmsa->sev_features);
722
723
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
724
ghcb_set_sw_exit_info_1(ghcb,
725
((u64)apic_id << 32) |
726
((u64)snp_vmpl << 16) |
727
event);
728
ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
729
730
sev_es_wr_ghcb_msr(__pa(ghcb));
731
VMGEXIT();
732
733
if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
734
lower_32_bits(ghcb->save.sw_exit_info_1)) {
735
pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY"));
736
ret = -EINVAL;
737
}
738
739
__sev_put_ghcb(&state);
740
741
local_irq_restore(flags);
742
743
return ret;
744
}
745
746
static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
747
{
748
int ret;
749
750
if (snp_vmpl) {
751
struct svsm_call call = {};
752
unsigned long flags;
753
754
local_irq_save(flags);
755
756
call.caa = this_cpu_read(svsm_caa);
757
call.rcx = __pa(va);
758
759
if (make_vmsa) {
760
/* Protocol 0, Call ID 2 */
761
call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
762
call.rdx = __pa(caa);
763
call.r8 = apic_id;
764
} else {
765
/* Protocol 0, Call ID 3 */
766
call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
767
}
768
769
ret = svsm_perform_call_protocol(&call);
770
771
local_irq_restore(flags);
772
} else {
773
/*
774
* If the kernel runs at VMPL0, it can change the VMSA
775
* bit for a page using the RMPADJUST instruction.
776
* However, for the instruction to succeed it must
777
* target the permissions of a lesser privileged (higher
778
* numbered) VMPL level, so use VMPL1.
779
*/
780
u64 attrs = 1;
781
782
if (make_vmsa)
783
attrs |= RMPADJUST_VMSA_PAGE_BIT;
784
785
ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
786
}
787
788
return ret;
789
}
790
791
static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
792
{
793
int err;
794
795
err = snp_set_vmsa(vmsa, NULL, apic_id, false);
796
if (err)
797
pr_err("clear VMSA page failed (%u), leaking page\n", err);
798
else
799
free_page((unsigned long)vmsa);
800
}
801
802
static void set_pte_enc(pte_t *kpte, int level, void *va)
803
{
804
struct pte_enc_desc d = {
805
.kpte = kpte,
806
.pte_level = level,
807
.va = va,
808
.encrypt = true
809
};
810
811
prepare_pte_enc(&d);
812
set_pte_enc_mask(kpte, d.pfn, d.new_pgprot);
813
}
814
815
static void unshare_all_memory(void)
816
{
817
unsigned long addr, end, size, ghcb;
818
struct sev_es_runtime_data *data;
819
unsigned int npages, level;
820
bool skipped_addr;
821
pte_t *pte;
822
int cpu;
823
824
/* Unshare the direct mapping. */
825
addr = PAGE_OFFSET;
826
end = PAGE_OFFSET + get_max_mapped();
827
828
while (addr < end) {
829
pte = lookup_address(addr, &level);
830
size = page_level_size(level);
831
npages = size / PAGE_SIZE;
832
skipped_addr = false;
833
834
if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) {
835
addr += size;
836
continue;
837
}
838
839
/*
840
* Ensure that all the per-CPU GHCBs are made private at the
841
* end of the unsharing loop so that the switch to the slower
842
* MSR protocol happens last.
843
*/
844
for_each_possible_cpu(cpu) {
845
data = per_cpu(runtime_data, cpu);
846
ghcb = (unsigned long)&data->ghcb_page;
847
848
/* Handle the case of a huge page containing the GHCB page */
849
if (addr <= ghcb && ghcb < addr + size) {
850
skipped_addr = true;
851
break;
852
}
853
}
854
855
if (!skipped_addr) {
856
set_pte_enc(pte, level, (void *)addr);
857
snp_set_memory_private(addr, npages);
858
}
859
addr += size;
860
}
861
862
/* Unshare all bss decrypted memory. */
863
addr = (unsigned long)__start_bss_decrypted;
864
end = (unsigned long)__start_bss_decrypted_unused;
865
npages = (end - addr) >> PAGE_SHIFT;
866
867
for (; addr < end; addr += PAGE_SIZE) {
868
pte = lookup_address(addr, &level);
869
if (!pte || !pte_decrypted(*pte) || pte_none(*pte))
870
continue;
871
872
set_pte_enc(pte, level, (void *)addr);
873
}
874
addr = (unsigned long)__start_bss_decrypted;
875
snp_set_memory_private(addr, npages);
876
877
__flush_tlb_all();
878
}
879
880
/* Stop new private<->shared conversions */
881
void snp_kexec_begin(void)
882
{
883
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
884
return;
885
886
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
887
return;
888
889
/*
890
* Crash kernel ends up here with interrupts disabled: can't wait for
891
* conversions to finish.
892
*
893
* If race happened, just report and proceed.
894
*/
895
if (!set_memory_enc_stop_conversion())
896
pr_warn("Failed to stop shared<->private conversions\n");
897
}
898
899
/*
900
* Shutdown all APs except the one handling kexec/kdump and clearing
901
* the VMSA tag on AP's VMSA pages as they are not being used as
902
* VMSA page anymore.
903
*/
904
static void shutdown_all_aps(void)
905
{
906
struct sev_es_save_area *vmsa;
907
int apic_id, this_cpu, cpu;
908
909
this_cpu = get_cpu();
910
911
/*
912
* APs are already in HLT loop when enc_kexec_finish() callback
913
* is invoked.
914
*/
915
for_each_present_cpu(cpu) {
916
vmsa = per_cpu(sev_vmsa, cpu);
917
918
/*
919
* The BSP or offlined APs do not have guest allocated VMSA
920
* and there is no need to clear the VMSA tag for this page.
921
*/
922
if (!vmsa)
923
continue;
924
925
/*
926
* Cannot clear the VMSA tag for the currently running vCPU.
927
*/
928
if (this_cpu == cpu) {
929
unsigned long pa;
930
struct page *p;
931
932
pa = __pa(vmsa);
933
/*
934
* Mark the VMSA page of the running vCPU as offline
935
* so that is excluded and not touched by makedumpfile
936
* while generating vmcore during kdump.
937
*/
938
p = pfn_to_online_page(pa >> PAGE_SHIFT);
939
if (p)
940
__SetPageOffline(p);
941
continue;
942
}
943
944
apic_id = cpuid_to_apicid[cpu];
945
946
/*
947
* Issue AP destroy to ensure AP gets kicked out of guest mode
948
* to allow using RMPADJUST to remove the VMSA tag on it's
949
* VMSA page.
950
*/
951
vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
952
snp_cleanup_vmsa(vmsa, apic_id);
953
}
954
955
put_cpu();
956
}
957
958
void snp_kexec_finish(void)
959
{
960
struct sev_es_runtime_data *data;
961
unsigned long size, addr;
962
unsigned int level, cpu;
963
struct ghcb *ghcb;
964
pte_t *pte;
965
966
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
967
return;
968
969
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
970
return;
971
972
shutdown_all_aps();
973
974
unshare_all_memory();
975
976
/*
977
* Switch to using the MSR protocol to change per-CPU GHCBs to
978
* private. All the per-CPU GHCBs have been switched back to private,
979
* so can't do any more GHCB calls to the hypervisor beyond this point
980
* until the kexec'ed kernel starts running.
981
*/
982
boot_ghcb = NULL;
983
sev_cfg.ghcbs_initialized = false;
984
985
for_each_possible_cpu(cpu) {
986
data = per_cpu(runtime_data, cpu);
987
ghcb = &data->ghcb_page;
988
pte = lookup_address((unsigned long)ghcb, &level);
989
size = page_level_size(level);
990
/* Handle the case of a huge page containing the GHCB page */
991
addr = (unsigned long)ghcb & page_level_mask(level);
992
set_pte_enc(pte, level, (void *)addr);
993
snp_set_memory_private(addr, (size / PAGE_SIZE));
994
}
995
}
996
997
#define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
998
#define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
999
#define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
1000
1001
#define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2)
1002
#define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3)
1003
1004
static void *snp_alloc_vmsa_page(int cpu)
1005
{
1006
struct page *p;
1007
1008
/*
1009
* Allocate VMSA page to work around the SNP erratum where the CPU will
1010
* incorrectly signal an RMP violation #PF if a large page (2MB or 1GB)
1011
* collides with the RMP entry of VMSA page. The recommended workaround
1012
* is to not use a large page.
1013
*
1014
* Allocate an 8k page which is also 8k-aligned.
1015
*/
1016
p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
1017
if (!p)
1018
return NULL;
1019
1020
split_page(p, 1);
1021
1022
/* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
1023
__free_page(p);
1024
1025
return page_address(p + 1);
1026
}
1027
1028
static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned int cpu)
1029
{
1030
struct sev_es_save_area *cur_vmsa, *vmsa;
1031
struct svsm_ca *caa;
1032
u8 sipi_vector;
1033
int ret;
1034
u64 cr4;
1035
1036
/*
1037
* The hypervisor SNP feature support check has happened earlier, just check
1038
* the AP_CREATION one here.
1039
*/
1040
if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION))
1041
return -EOPNOTSUPP;
1042
1043
/*
1044
* Verify the desired start IP against the known trampoline start IP
1045
* to catch any future new trampolines that may be introduced that
1046
* would require a new protected guest entry point.
1047
*/
1048
if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
1049
"Unsupported SNP start_ip: %lx\n", start_ip))
1050
return -EINVAL;
1051
1052
/* Override start_ip with known protected guest start IP */
1053
start_ip = real_mode_header->sev_es_trampoline_start;
1054
cur_vmsa = per_cpu(sev_vmsa, cpu);
1055
1056
/*
1057
* A new VMSA is created each time because there is no guarantee that
1058
* the current VMSA is the kernels or that the vCPU is not running. If
1059
* an attempt was done to use the current VMSA with a running vCPU, a
1060
* #VMEXIT of that vCPU would wipe out all of the settings being done
1061
* here.
1062
*/
1063
vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu);
1064
if (!vmsa)
1065
return -ENOMEM;
1066
1067
/* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */
1068
caa = per_cpu(svsm_caa, cpu);
1069
1070
/* CR4 should maintain the MCE value */
1071
cr4 = native_read_cr4() & X86_CR4_MCE;
1072
1073
/* Set the CS value based on the start_ip converted to a SIPI vector */
1074
sipi_vector = (start_ip >> 12);
1075
vmsa->cs.base = sipi_vector << 12;
1076
vmsa->cs.limit = AP_INIT_CS_LIMIT;
1077
vmsa->cs.attrib = INIT_CS_ATTRIBS;
1078
vmsa->cs.selector = sipi_vector << 8;
1079
1080
/* Set the RIP value based on start_ip */
1081
vmsa->rip = start_ip & 0xfff;
1082
1083
/* Set AP INIT defaults as documented in the APM */
1084
vmsa->ds.limit = AP_INIT_DS_LIMIT;
1085
vmsa->ds.attrib = INIT_DS_ATTRIBS;
1086
vmsa->es = vmsa->ds;
1087
vmsa->fs = vmsa->ds;
1088
vmsa->gs = vmsa->ds;
1089
vmsa->ss = vmsa->ds;
1090
1091
vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT;
1092
vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT;
1093
vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS;
1094
vmsa->idtr.limit = AP_INIT_IDTR_LIMIT;
1095
vmsa->tr.limit = AP_INIT_TR_LIMIT;
1096
vmsa->tr.attrib = INIT_TR_ATTRIBS;
1097
1098
vmsa->cr4 = cr4;
1099
vmsa->cr0 = AP_INIT_CR0_DEFAULT;
1100
vmsa->dr7 = DR7_RESET_VALUE;
1101
vmsa->dr6 = AP_INIT_DR6_DEFAULT;
1102
vmsa->rflags = AP_INIT_RFLAGS_DEFAULT;
1103
vmsa->g_pat = AP_INIT_GPAT_DEFAULT;
1104
vmsa->xcr0 = AP_INIT_XCR0_DEFAULT;
1105
vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT;
1106
vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT;
1107
vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT;
1108
1109
if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC))
1110
vmsa->vintr_ctrl |= V_GIF_MASK | V_NMI_ENABLE_MASK;
1111
1112
/* SVME must be set. */
1113
vmsa->efer = EFER_SVME;
1114
1115
/*
1116
* Set the SNP-specific fields for this VMSA:
1117
* VMPL level
1118
* SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
1119
*/
1120
vmsa->vmpl = snp_vmpl;
1121
vmsa->sev_features = sev_status >> 2;
1122
1123
/* Populate AP's TSC scale/offset to get accurate TSC values. */
1124
if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) {
1125
vmsa->tsc_scale = snp_tsc_scale;
1126
vmsa->tsc_offset = snp_tsc_offset;
1127
}
1128
1129
/* Switch the page over to a VMSA page now that it is initialized */
1130
ret = snp_set_vmsa(vmsa, caa, apic_id, true);
1131
if (ret) {
1132
pr_err("set VMSA page failed (%u)\n", ret);
1133
free_page((unsigned long)vmsa);
1134
1135
return -EINVAL;
1136
}
1137
1138
/* Issue VMGEXIT AP Creation NAE event */
1139
ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id);
1140
if (ret) {
1141
snp_cleanup_vmsa(vmsa, apic_id);
1142
vmsa = NULL;
1143
}
1144
1145
/* Free up any previous VMSA page */
1146
if (cur_vmsa)
1147
snp_cleanup_vmsa(cur_vmsa, apic_id);
1148
1149
/* Record the current VMSA page */
1150
per_cpu(sev_vmsa, cpu) = vmsa;
1151
1152
return ret;
1153
}
1154
1155
void __init snp_set_wakeup_secondary_cpu(void)
1156
{
1157
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1158
return;
1159
1160
/*
1161
* Always set this override if SNP is enabled. This makes it the
1162
* required method to start APs under SNP. If the hypervisor does
1163
* not support AP creation, then no APs will be started.
1164
*/
1165
apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit);
1166
}
1167
1168
int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
1169
{
1170
u16 startup_cs, startup_ip;
1171
phys_addr_t jump_table_pa;
1172
u64 jump_table_addr;
1173
u16 __iomem *jump_table;
1174
1175
jump_table_addr = get_jump_table_addr();
1176
1177
/* On UP guests there is no jump table so this is not a failure */
1178
if (!jump_table_addr)
1179
return 0;
1180
1181
/* Check if AP Jump Table is page-aligned */
1182
if (jump_table_addr & ~PAGE_MASK)
1183
return -EINVAL;
1184
1185
jump_table_pa = jump_table_addr & PAGE_MASK;
1186
1187
startup_cs = (u16)(rmh->trampoline_start >> 4);
1188
startup_ip = (u16)(rmh->sev_es_trampoline_start -
1189
rmh->trampoline_start);
1190
1191
jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
1192
if (!jump_table)
1193
return -EIO;
1194
1195
writew(startup_ip, &jump_table[0]);
1196
writew(startup_cs, &jump_table[1]);
1197
1198
iounmap(jump_table);
1199
1200
return 0;
1201
}
1202
1203
/*
1204
* This is needed by the OVMF UEFI firmware which will use whatever it finds in
1205
* the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
1206
* runtime GHCBs used by the kernel are also mapped in the EFI page-table.
1207
*
1208
* When running under SVSM the CA page is needed too, so map it as well.
1209
*/
1210
int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd)
1211
{
1212
unsigned long address, pflags, pflags_enc;
1213
struct sev_es_runtime_data *data;
1214
int cpu;
1215
u64 pfn;
1216
1217
if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1218
return 0;
1219
1220
pflags = _PAGE_NX | _PAGE_RW;
1221
pflags_enc = cc_mkenc(pflags);
1222
1223
for_each_possible_cpu(cpu) {
1224
data = per_cpu(runtime_data, cpu);
1225
1226
address = __pa(&data->ghcb_page);
1227
pfn = address >> PAGE_SHIFT;
1228
1229
if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
1230
return 1;
1231
1232
if (snp_vmpl) {
1233
address = per_cpu(svsm_caa_pa, cpu);
1234
if (!address)
1235
return 1;
1236
1237
pfn = address >> PAGE_SHIFT;
1238
if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags_enc))
1239
return 1;
1240
}
1241
}
1242
1243
return 0;
1244
}
1245
1246
u64 savic_ghcb_msr_read(u32 reg)
1247
{
1248
u64 msr = APIC_BASE_MSR + (reg >> 4);
1249
struct pt_regs regs = { .cx = msr };
1250
struct es_em_ctxt ctxt = { .regs = &regs };
1251
struct ghcb_state state;
1252
enum es_result res;
1253
struct ghcb *ghcb;
1254
1255
guard(irqsave)();
1256
1257
ghcb = __sev_get_ghcb(&state);
1258
vc_ghcb_invalidate(ghcb);
1259
1260
res = sev_es_ghcb_handle_msr(ghcb, &ctxt, false);
1261
if (res != ES_OK) {
1262
pr_err("Secure AVIC MSR (0x%llx) read returned error (%d)\n", msr, res);
1263
/* MSR read failures are treated as fatal errors */
1264
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
1265
}
1266
1267
__sev_put_ghcb(&state);
1268
1269
return regs.ax | regs.dx << 32;
1270
}
1271
1272
void savic_ghcb_msr_write(u32 reg, u64 value)
1273
{
1274
u64 msr = APIC_BASE_MSR + (reg >> 4);
1275
struct pt_regs regs = {
1276
.cx = msr,
1277
.ax = lower_32_bits(value),
1278
.dx = upper_32_bits(value)
1279
};
1280
struct es_em_ctxt ctxt = { .regs = &regs };
1281
struct ghcb_state state;
1282
enum es_result res;
1283
struct ghcb *ghcb;
1284
1285
guard(irqsave)();
1286
1287
ghcb = __sev_get_ghcb(&state);
1288
vc_ghcb_invalidate(ghcb);
1289
1290
res = sev_es_ghcb_handle_msr(ghcb, &ctxt, true);
1291
if (res != ES_OK) {
1292
pr_err("Secure AVIC MSR (0x%llx) write returned error (%d)\n", msr, res);
1293
/* MSR writes should never fail. Any failure is fatal error for SNP guest */
1294
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
1295
}
1296
1297
__sev_put_ghcb(&state);
1298
}
1299
1300
enum es_result savic_register_gpa(u64 gpa)
1301
{
1302
struct ghcb_state state;
1303
struct es_em_ctxt ctxt;
1304
enum es_result res;
1305
struct ghcb *ghcb;
1306
1307
guard(irqsave)();
1308
1309
ghcb = __sev_get_ghcb(&state);
1310
vc_ghcb_invalidate(ghcb);
1311
1312
ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA);
1313
ghcb_set_rbx(ghcb, gpa);
1314
res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC,
1315
SVM_VMGEXIT_SAVIC_REGISTER_GPA, 0);
1316
1317
__sev_put_ghcb(&state);
1318
1319
return res;
1320
}
1321
1322
enum es_result savic_unregister_gpa(u64 *gpa)
1323
{
1324
struct ghcb_state state;
1325
struct es_em_ctxt ctxt;
1326
enum es_result res;
1327
struct ghcb *ghcb;
1328
1329
guard(irqsave)();
1330
1331
ghcb = __sev_get_ghcb(&state);
1332
vc_ghcb_invalidate(ghcb);
1333
1334
ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA);
1335
res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC,
1336
SVM_VMGEXIT_SAVIC_UNREGISTER_GPA, 0);
1337
if (gpa && res == ES_OK)
1338
*gpa = ghcb->save.rbx;
1339
1340
__sev_put_ghcb(&state);
1341
1342
return res;
1343
}
1344
1345
static void snp_register_per_cpu_ghcb(void)
1346
{
1347
struct sev_es_runtime_data *data;
1348
struct ghcb *ghcb;
1349
1350
data = this_cpu_read(runtime_data);
1351
ghcb = &data->ghcb_page;
1352
1353
snp_register_ghcb_early(__pa(ghcb));
1354
}
1355
1356
void setup_ghcb(void)
1357
{
1358
if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1359
return;
1360
1361
/*
1362
* Check whether the runtime #VC exception handler is active. It uses
1363
* the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
1364
*
1365
* If SNP is active, register the per-CPU GHCB page so that the runtime
1366
* exception handler can use it.
1367
*/
1368
if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) {
1369
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1370
snp_register_per_cpu_ghcb();
1371
1372
sev_cfg.ghcbs_initialized = true;
1373
1374
return;
1375
}
1376
1377
/*
1378
* Make sure the hypervisor talks a supported protocol.
1379
* This gets called only in the BSP boot phase.
1380
*/
1381
if (!sev_es_negotiate_protocol())
1382
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
1383
1384
/*
1385
* Clear the boot_ghcb. The first exception comes in before the bss
1386
* section is cleared.
1387
*/
1388
memset(&boot_ghcb_page, 0, PAGE_SIZE);
1389
1390
/* Alright - Make the boot-ghcb public */
1391
boot_ghcb = &boot_ghcb_page;
1392
1393
/* SNP guest requires that GHCB GPA must be registered. */
1394
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1395
snp_register_ghcb_early(__pa(&boot_ghcb_page));
1396
}
1397
1398
#ifdef CONFIG_HOTPLUG_CPU
1399
static void sev_es_ap_hlt_loop(void)
1400
{
1401
struct ghcb_state state;
1402
struct ghcb *ghcb;
1403
1404
ghcb = __sev_get_ghcb(&state);
1405
1406
while (true) {
1407
vc_ghcb_invalidate(ghcb);
1408
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
1409
ghcb_set_sw_exit_info_1(ghcb, 0);
1410
ghcb_set_sw_exit_info_2(ghcb, 0);
1411
1412
sev_es_wr_ghcb_msr(__pa(ghcb));
1413
VMGEXIT();
1414
1415
/* Wakeup signal? */
1416
if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
1417
ghcb->save.sw_exit_info_2)
1418
break;
1419
}
1420
1421
__sev_put_ghcb(&state);
1422
}
1423
1424
/*
1425
* Play_dead handler when running under SEV-ES. This is needed because
1426
* the hypervisor can't deliver an SIPI request to restart the AP.
1427
* Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
1428
* hypervisor wakes it up again.
1429
*/
1430
static void sev_es_play_dead(void)
1431
{
1432
play_dead_common();
1433
1434
/* IRQs now disabled */
1435
1436
sev_es_ap_hlt_loop();
1437
1438
/*
1439
* If we get here, the VCPU was woken up again. Jump to CPU
1440
* startup code to get it back online.
1441
*/
1442
soft_restart_cpu();
1443
}
1444
#else /* CONFIG_HOTPLUG_CPU */
1445
#define sev_es_play_dead native_play_dead
1446
#endif /* CONFIG_HOTPLUG_CPU */
1447
1448
#ifdef CONFIG_SMP
1449
static void __init sev_es_setup_play_dead(void)
1450
{
1451
smp_ops.play_dead = sev_es_play_dead;
1452
}
1453
#else
1454
static inline void sev_es_setup_play_dead(void) { }
1455
#endif
1456
1457
static void __init alloc_runtime_data(int cpu)
1458
{
1459
struct sev_es_runtime_data *data;
1460
1461
data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu));
1462
if (!data)
1463
panic("Can't allocate SEV-ES runtime data");
1464
1465
per_cpu(runtime_data, cpu) = data;
1466
1467
if (snp_vmpl) {
1468
struct svsm_ca *caa;
1469
1470
/* Allocate the SVSM CA page if an SVSM is present */
1471
caa = cpu ? memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE)
1472
: &boot_svsm_ca_page;
1473
1474
per_cpu(svsm_caa, cpu) = caa;
1475
per_cpu(svsm_caa_pa, cpu) = __pa(caa);
1476
}
1477
}
1478
1479
static void __init init_ghcb(int cpu)
1480
{
1481
struct sev_es_runtime_data *data;
1482
int err;
1483
1484
data = per_cpu(runtime_data, cpu);
1485
1486
err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
1487
sizeof(data->ghcb_page));
1488
if (err)
1489
panic("Can't map GHCBs unencrypted");
1490
1491
memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
1492
1493
data->ghcb_active = false;
1494
data->backup_ghcb_active = false;
1495
}
1496
1497
void __init sev_es_init_vc_handling(void)
1498
{
1499
int cpu;
1500
1501
BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
1502
1503
if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1504
return;
1505
1506
if (!sev_es_check_cpu_features())
1507
panic("SEV-ES CPU Features missing");
1508
1509
/*
1510
* SNP is supported in v2 of the GHCB spec which mandates support for HV
1511
* features.
1512
*/
1513
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
1514
sev_hv_features = get_hv_features();
1515
1516
if (!(sev_hv_features & GHCB_HV_FT_SNP))
1517
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
1518
}
1519
1520
/* Initialize per-cpu GHCB pages */
1521
for_each_possible_cpu(cpu) {
1522
alloc_runtime_data(cpu);
1523
init_ghcb(cpu);
1524
}
1525
1526
if (snp_vmpl)
1527
sev_cfg.use_cas = true;
1528
1529
sev_es_setup_play_dead();
1530
1531
/* Secondary CPUs use the runtime #VC handler */
1532
initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
1533
}
1534
1535
/*
1536
* SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
1537
* enabled, as the alternative (fallback) logic for DMI probing in the legacy
1538
* ROM region can cause a crash since this region is not pre-validated.
1539
*/
1540
void __init snp_dmi_setup(void)
1541
{
1542
if (efi_enabled(EFI_CONFIG_TABLES))
1543
dmi_setup();
1544
}
1545
1546
static void dump_cpuid_table(void)
1547
{
1548
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
1549
int i = 0;
1550
1551
pr_info("count=%d reserved=0x%x reserved2=0x%llx\n",
1552
cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2);
1553
1554
for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) {
1555
const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
1556
1557
pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n",
1558
i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx,
1559
fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved);
1560
}
1561
}
1562
1563
/*
1564
* It is useful from an auditing/testing perspective to provide an easy way
1565
* for the guest owner to know that the CPUID table has been initialized as
1566
* expected, but that initialization happens too early in boot to print any
1567
* sort of indicator, and there's not really any other good place to do it,
1568
* so do it here.
1569
*
1570
* If running as an SNP guest, report the current VM privilege level (VMPL).
1571
*/
1572
static int __init report_snp_info(void)
1573
{
1574
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
1575
1576
if (cpuid_table->count) {
1577
pr_info("Using SNP CPUID table, %d entries present.\n",
1578
cpuid_table->count);
1579
1580
if (sev_cfg.debug)
1581
dump_cpuid_table();
1582
}
1583
1584
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1585
pr_info("SNP running at VMPL%u.\n", snp_vmpl);
1586
1587
return 0;
1588
}
1589
arch_initcall(report_snp_info);
1590
1591
static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input)
1592
{
1593
/* If (new) lengths have been returned, propagate them up */
1594
if (call->rcx_out != call->rcx)
1595
input->manifest_buf.len = call->rcx_out;
1596
1597
if (call->rdx_out != call->rdx)
1598
input->certificates_buf.len = call->rdx_out;
1599
1600
if (call->r8_out != call->r8)
1601
input->report_buf.len = call->r8_out;
1602
}
1603
1604
int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call,
1605
struct svsm_attest_call *input)
1606
{
1607
struct svsm_attest_call *ac;
1608
unsigned long flags;
1609
u64 attest_call_pa;
1610
int ret;
1611
1612
if (!snp_vmpl)
1613
return -EINVAL;
1614
1615
local_irq_save(flags);
1616
1617
call->caa = svsm_get_caa();
1618
1619
ac = (struct svsm_attest_call *)call->caa->svsm_buffer;
1620
attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
1621
1622
*ac = *input;
1623
1624
/*
1625
* Set input registers for the request and set RDX and R8 to known
1626
* values in order to detect length values being returned in them.
1627
*/
1628
call->rax = call_id;
1629
call->rcx = attest_call_pa;
1630
call->rdx = -1;
1631
call->r8 = -1;
1632
ret = svsm_perform_call_protocol(call);
1633
update_attest_input(call, input);
1634
1635
local_irq_restore(flags);
1636
1637
return ret;
1638
}
1639
EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req);
1640
1641
static int snp_issue_guest_request(struct snp_guest_req *req)
1642
{
1643
struct snp_req_data *input = &req->input;
1644
struct ghcb_state state;
1645
struct es_em_ctxt ctxt;
1646
unsigned long flags;
1647
struct ghcb *ghcb;
1648
int ret;
1649
1650
req->exitinfo2 = SEV_RET_NO_FW_CALL;
1651
1652
/*
1653
* __sev_get_ghcb() needs to run with IRQs disabled because it is using
1654
* a per-CPU GHCB.
1655
*/
1656
local_irq_save(flags);
1657
1658
ghcb = __sev_get_ghcb(&state);
1659
if (!ghcb) {
1660
ret = -EIO;
1661
goto e_restore_irq;
1662
}
1663
1664
vc_ghcb_invalidate(ghcb);
1665
1666
if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
1667
ghcb_set_rax(ghcb, input->data_gpa);
1668
ghcb_set_rbx(ghcb, input->data_npages);
1669
}
1670
1671
ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa);
1672
if (ret)
1673
goto e_put;
1674
1675
req->exitinfo2 = ghcb->save.sw_exit_info_2;
1676
switch (req->exitinfo2) {
1677
case 0:
1678
break;
1679
1680
case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY):
1681
ret = -EAGAIN;
1682
break;
1683
1684
case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN):
1685
/* Number of expected pages are returned in RBX */
1686
if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
1687
input->data_npages = ghcb_get_rbx(ghcb);
1688
ret = -ENOSPC;
1689
break;
1690
}
1691
fallthrough;
1692
default:
1693
ret = -EIO;
1694
break;
1695
}
1696
1697
e_put:
1698
__sev_put_ghcb(&state);
1699
e_restore_irq:
1700
local_irq_restore(flags);
1701
1702
return ret;
1703
}
1704
1705
/**
1706
* snp_svsm_vtpm_probe() - Probe if SVSM provides a vTPM device
1707
*
1708
* Check that there is SVSM and that it supports at least TPM_SEND_COMMAND
1709
* which is the only request used so far.
1710
*
1711
* Return: true if the platform provides a vTPM SVSM device, false otherwise.
1712
*/
1713
static bool snp_svsm_vtpm_probe(void)
1714
{
1715
struct svsm_call call = {};
1716
1717
/* The vTPM device is available only if a SVSM is present */
1718
if (!snp_vmpl)
1719
return false;
1720
1721
call.caa = svsm_get_caa();
1722
call.rax = SVSM_VTPM_CALL(SVSM_VTPM_QUERY);
1723
1724
if (svsm_perform_call_protocol(&call))
1725
return false;
1726
1727
/* Check platform commands contains TPM_SEND_COMMAND - platform command 8 */
1728
return call.rcx_out & BIT_ULL(8);
1729
}
1730
1731
/**
1732
* snp_svsm_vtpm_send_command() - Execute a vTPM operation on SVSM
1733
* @buffer: A buffer used to both send the command and receive the response.
1734
*
1735
* Execute a SVSM_VTPM_CMD call as defined by
1736
* "Secure VM Service Module for SEV-SNP Guests" Publication # 58019 Revision: 1.00
1737
*
1738
* All command request/response buffers have a common structure as specified by
1739
* the following table:
1740
* Byte Size     In/Out    Description
1741
* Offset    (Bytes)
1742
* 0x000     4          In        Platform command
1743
 *                         Out       Platform command response size
1744
*
1745
* Each command can build upon this common request/response structure to create
1746
* a structure specific to the command. See include/linux/tpm_svsm.h for more
1747
* details.
1748
*
1749
* Return: 0 on success, -errno on failure
1750
*/
1751
int snp_svsm_vtpm_send_command(u8 *buffer)
1752
{
1753
struct svsm_call call = {};
1754
1755
call.caa = svsm_get_caa();
1756
call.rax = SVSM_VTPM_CALL(SVSM_VTPM_CMD);
1757
call.rcx = __pa(buffer);
1758
1759
return svsm_perform_call_protocol(&call);
1760
}
1761
EXPORT_SYMBOL_GPL(snp_svsm_vtpm_send_command);
1762
1763
static struct platform_device sev_guest_device = {
1764
.name = "sev-guest",
1765
.id = -1,
1766
};
1767
1768
static struct platform_device tpm_svsm_device = {
1769
.name = "tpm-svsm",
1770
.id = -1,
1771
};
1772
1773
static int __init snp_init_platform_device(void)
1774
{
1775
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1776
return -ENODEV;
1777
1778
if (platform_device_register(&sev_guest_device))
1779
return -ENODEV;
1780
1781
if (snp_svsm_vtpm_probe() &&
1782
platform_device_register(&tpm_svsm_device))
1783
return -ENODEV;
1784
1785
pr_info("SNP guest platform devices initialized.\n");
1786
return 0;
1787
}
1788
device_initcall(snp_init_platform_device);
1789
1790
void sev_show_status(void)
1791
{
1792
int i;
1793
1794
pr_info("Status: ");
1795
for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) {
1796
if (sev_status & BIT_ULL(i)) {
1797
if (!sev_status_feat_names[i])
1798
continue;
1799
1800
pr_cont("%s ", sev_status_feat_names[i]);
1801
}
1802
}
1803
pr_cont("\n");
1804
}
1805
1806
#ifdef CONFIG_SYSFS
1807
static ssize_t vmpl_show(struct kobject *kobj,
1808
struct kobj_attribute *attr, char *buf)
1809
{
1810
return sysfs_emit(buf, "%d\n", snp_vmpl);
1811
}
1812
1813
static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl);
1814
1815
static struct attribute *vmpl_attrs[] = {
1816
&vmpl_attr.attr,
1817
NULL
1818
};
1819
1820
static struct attribute_group sev_attr_group = {
1821
.attrs = vmpl_attrs,
1822
};
1823
1824
static int __init sev_sysfs_init(void)
1825
{
1826
struct kobject *sev_kobj;
1827
struct device *dev_root;
1828
int ret;
1829
1830
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1831
return -ENODEV;
1832
1833
dev_root = bus_get_dev_root(&cpu_subsys);
1834
if (!dev_root)
1835
return -ENODEV;
1836
1837
sev_kobj = kobject_create_and_add("sev", &dev_root->kobj);
1838
put_device(dev_root);
1839
1840
if (!sev_kobj)
1841
return -ENOMEM;
1842
1843
ret = sysfs_create_group(sev_kobj, &sev_attr_group);
1844
if (ret)
1845
kobject_put(sev_kobj);
1846
1847
return ret;
1848
}
1849
arch_initcall(sev_sysfs_init);
1850
#endif // CONFIG_SYSFS
1851
1852
static void free_shared_pages(void *buf, size_t sz)
1853
{
1854
unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
1855
int ret;
1856
1857
if (!buf)
1858
return;
1859
1860
ret = set_memory_encrypted((unsigned long)buf, npages);
1861
if (ret) {
1862
WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n");
1863
return;
1864
}
1865
1866
__free_pages(virt_to_page(buf), get_order(sz));
1867
}
1868
1869
static void *alloc_shared_pages(size_t sz)
1870
{
1871
unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
1872
struct page *page;
1873
int ret;
1874
1875
page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz));
1876
if (!page)
1877
return NULL;
1878
1879
ret = set_memory_decrypted((unsigned long)page_address(page), npages);
1880
if (ret) {
1881
pr_err("failed to mark page shared, ret=%d\n", ret);
1882
__free_pages(page, get_order(sz));
1883
return NULL;
1884
}
1885
1886
return page_address(page);
1887
}
1888
1889
static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno)
1890
{
1891
u8 *key = NULL;
1892
1893
switch (id) {
1894
case 0:
1895
*seqno = &secrets->os_area.msg_seqno_0;
1896
key = secrets->vmpck0;
1897
break;
1898
case 1:
1899
*seqno = &secrets->os_area.msg_seqno_1;
1900
key = secrets->vmpck1;
1901
break;
1902
case 2:
1903
*seqno = &secrets->os_area.msg_seqno_2;
1904
key = secrets->vmpck2;
1905
break;
1906
case 3:
1907
*seqno = &secrets->os_area.msg_seqno_3;
1908
key = secrets->vmpck3;
1909
break;
1910
default:
1911
break;
1912
}
1913
1914
return key;
1915
}
1916
1917
static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen)
1918
{
1919
struct aesgcm_ctx *ctx;
1920
1921
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1922
if (!ctx)
1923
return NULL;
1924
1925
if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) {
1926
pr_err("Crypto context initialization failed\n");
1927
kfree(ctx);
1928
return NULL;
1929
}
1930
1931
return ctx;
1932
}
1933
1934
int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id)
1935
{
1936
/* Adjust the default VMPCK key based on the executing VMPL level */
1937
if (vmpck_id == -1)
1938
vmpck_id = snp_vmpl;
1939
1940
mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno);
1941
if (!mdesc->vmpck) {
1942
pr_err("Invalid VMPCK%d communication key\n", vmpck_id);
1943
return -EINVAL;
1944
}
1945
1946
/* Verify that VMPCK is not zero. */
1947
if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
1948
pr_err("Empty VMPCK%d communication key\n", vmpck_id);
1949
return -EINVAL;
1950
}
1951
1952
mdesc->vmpck_id = vmpck_id;
1953
1954
mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN);
1955
if (!mdesc->ctx)
1956
return -ENOMEM;
1957
1958
return 0;
1959
}
1960
EXPORT_SYMBOL_GPL(snp_msg_init);
1961
1962
struct snp_msg_desc *snp_msg_alloc(void)
1963
{
1964
struct snp_msg_desc *mdesc;
1965
void __iomem *mem;
1966
1967
BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE);
1968
1969
mdesc = kzalloc(sizeof(struct snp_msg_desc), GFP_KERNEL);
1970
if (!mdesc)
1971
return ERR_PTR(-ENOMEM);
1972
1973
mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
1974
if (!mem)
1975
goto e_free_mdesc;
1976
1977
mdesc->secrets = (__force struct snp_secrets_page *)mem;
1978
1979
/* Allocate the shared page used for the request and response message. */
1980
mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg));
1981
if (!mdesc->request)
1982
goto e_unmap;
1983
1984
mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg));
1985
if (!mdesc->response)
1986
goto e_free_request;
1987
1988
return mdesc;
1989
1990
e_free_request:
1991
free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
1992
e_unmap:
1993
iounmap(mem);
1994
e_free_mdesc:
1995
kfree(mdesc);
1996
1997
return ERR_PTR(-ENOMEM);
1998
}
1999
EXPORT_SYMBOL_GPL(snp_msg_alloc);
2000
2001
void snp_msg_free(struct snp_msg_desc *mdesc)
2002
{
2003
if (!mdesc)
2004
return;
2005
2006
kfree(mdesc->ctx);
2007
free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg));
2008
free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
2009
iounmap((__force void __iomem *)mdesc->secrets);
2010
2011
memset(mdesc, 0, sizeof(*mdesc));
2012
kfree(mdesc);
2013
}
2014
EXPORT_SYMBOL_GPL(snp_msg_free);
2015
2016
/* Mutex to serialize the shared buffer access and command handling. */
2017
static DEFINE_MUTEX(snp_cmd_mutex);
2018
2019
/*
2020
* If an error is received from the host or AMD Secure Processor (ASP) there
2021
* are two options. Either retry the exact same encrypted request or discontinue
2022
* using the VMPCK.
2023
*
2024
* This is because in the current encryption scheme GHCB v2 uses AES-GCM to
2025
* encrypt the requests. The IV for this scheme is the sequence number. GCM
2026
* cannot tolerate IV reuse.
2027
*
2028
* The ASP FW v1.51 only increments the sequence numbers on a successful
2029
* guest<->ASP back and forth and only accepts messages at its exact sequence
2030
* number.
2031
*
2032
* So if the sequence number were to be reused the encryption scheme is
2033
* vulnerable. If the sequence number were incremented for a fresh IV the ASP
2034
* will reject the request.
2035
*/
2036
static void snp_disable_vmpck(struct snp_msg_desc *mdesc)
2037
{
2038
pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n",
2039
mdesc->vmpck_id);
2040
memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN);
2041
mdesc->vmpck = NULL;
2042
}
2043
2044
static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc)
2045
{
2046
u64 count;
2047
2048
lockdep_assert_held(&snp_cmd_mutex);
2049
2050
/* Read the current message sequence counter from secrets pages */
2051
count = *mdesc->os_area_msg_seqno;
2052
2053
return count + 1;
2054
}
2055
2056
/* Return a non-zero on success */
2057
static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc)
2058
{
2059
u64 count = __snp_get_msg_seqno(mdesc);
2060
2061
/*
2062
* The message sequence counter for the SNP guest request is a 64-bit
2063
* value but the version 2 of GHCB specification defines a 32-bit storage
2064
* for it. If the counter exceeds the 32-bit value then return zero.
2065
* The caller should check the return value, but if the caller happens to
2066
* not check the value and use it, then the firmware treats zero as an
2067
* invalid number and will fail the message request.
2068
*/
2069
if (count >= UINT_MAX) {
2070
pr_err("request message sequence counter overflow\n");
2071
return 0;
2072
}
2073
2074
return count;
2075
}
2076
2077
static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc)
2078
{
2079
/*
2080
* The counter is also incremented by the PSP, so increment it by 2
2081
* and save in secrets page.
2082
*/
2083
*mdesc->os_area_msg_seqno += 2;
2084
}
2085
2086
static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
2087
{
2088
struct snp_guest_msg *resp_msg = &mdesc->secret_response;
2089
struct snp_guest_msg *req_msg = &mdesc->secret_request;
2090
struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr;
2091
struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr;
2092
struct aesgcm_ctx *ctx = mdesc->ctx;
2093
u8 iv[GCM_AES_IV_SIZE] = {};
2094
2095
pr_debug("response [seqno %lld type %d version %d sz %d]\n",
2096
resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version,
2097
resp_msg_hdr->msg_sz);
2098
2099
/* Copy response from shared memory to encrypted memory. */
2100
memcpy(resp_msg, mdesc->response, sizeof(*resp_msg));
2101
2102
/* Verify that the sequence counter is incremented by 1 */
2103
if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1)))
2104
return -EBADMSG;
2105
2106
/* Verify response message type and version number. */
2107
if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) ||
2108
resp_msg_hdr->msg_version != req_msg_hdr->msg_version)
2109
return -EBADMSG;
2110
2111
/*
2112
* If the message size is greater than our buffer length then return
2113
* an error.
2114
*/
2115
if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz))
2116
return -EBADMSG;
2117
2118
/* Decrypt the payload */
2119
memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno)));
2120
if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz,
2121
&resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag))
2122
return -EBADMSG;
2123
2124
return 0;
2125
}
2126
2127
static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req)
2128
{
2129
struct snp_guest_msg *msg = &mdesc->secret_request;
2130
struct snp_guest_msg_hdr *hdr = &msg->hdr;
2131
struct aesgcm_ctx *ctx = mdesc->ctx;
2132
u8 iv[GCM_AES_IV_SIZE] = {};
2133
2134
memset(msg, 0, sizeof(*msg));
2135
2136
hdr->algo = SNP_AEAD_AES_256_GCM;
2137
hdr->hdr_version = MSG_HDR_VER;
2138
hdr->hdr_sz = sizeof(*hdr);
2139
hdr->msg_type = req->msg_type;
2140
hdr->msg_version = req->msg_version;
2141
hdr->msg_seqno = seqno;
2142
hdr->msg_vmpck = req->vmpck_id;
2143
hdr->msg_sz = req->req_sz;
2144
2145
/* Verify the sequence number is non-zero */
2146
if (!hdr->msg_seqno)
2147
return -ENOSR;
2148
2149
pr_debug("request [seqno %lld type %d version %d sz %d]\n",
2150
hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz);
2151
2152
if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload)))
2153
return -EBADMSG;
2154
2155
memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno)));
2156
aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo,
2157
AAD_LEN, iv, hdr->authtag);
2158
2159
return 0;
2160
}
2161
2162
static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
2163
{
2164
unsigned long req_start = jiffies;
2165
unsigned int override_npages = 0;
2166
u64 override_err = 0;
2167
int rc;
2168
2169
retry_request:
2170
/*
2171
* Call firmware to process the request. In this function the encrypted
2172
* message enters shared memory with the host. So after this call the
2173
* sequence number must be incremented or the VMPCK must be deleted to
2174
* prevent reuse of the IV.
2175
*/
2176
rc = snp_issue_guest_request(req);
2177
switch (rc) {
2178
case -ENOSPC:
2179
/*
2180
* If the extended guest request fails due to having too
2181
* small of a certificate data buffer, retry the same
2182
* guest request without the extended data request in
2183
* order to increment the sequence number and thus avoid
2184
* IV reuse.
2185
*/
2186
override_npages = req->input.data_npages;
2187
req->exit_code = SVM_VMGEXIT_GUEST_REQUEST;
2188
2189
/*
2190
* Override the error to inform callers the given extended
2191
* request buffer size was too small and give the caller the
2192
* required buffer size.
2193
*/
2194
override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN);
2195
2196
/*
2197
* If this call to the firmware succeeds, the sequence number can
2198
* be incremented allowing for continued use of the VMPCK. If
2199
* there is an error reflected in the return value, this value
2200
* is checked further down and the result will be the deletion
2201
* of the VMPCK and the error code being propagated back to the
2202
* user as an ioctl() return code.
2203
*/
2204
goto retry_request;
2205
2206
/*
2207
* The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been
2208
* throttled. Retry in the driver to avoid returning and reusing the
2209
* message sequence number on a different message.
2210
*/
2211
case -EAGAIN:
2212
if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) {
2213
rc = -ETIMEDOUT;
2214
break;
2215
}
2216
schedule_timeout_killable(SNP_REQ_RETRY_DELAY);
2217
goto retry_request;
2218
}
2219
2220
/*
2221
* Increment the message sequence number. There is no harm in doing
2222
* this now because decryption uses the value stored in the response
2223
* structure and any failure will wipe the VMPCK, preventing further
2224
* use anyway.
2225
*/
2226
snp_inc_msg_seqno(mdesc);
2227
2228
if (override_err) {
2229
req->exitinfo2 = override_err;
2230
2231
/*
2232
* If an extended guest request was issued and the supplied certificate
2233
* buffer was not large enough, a standard guest request was issued to
2234
* prevent IV reuse. If the standard request was successful, return -EIO
2235
* back to the caller as would have originally been returned.
2236
*/
2237
if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
2238
rc = -EIO;
2239
}
2240
2241
if (override_npages)
2242
req->input.data_npages = override_npages;
2243
2244
return rc;
2245
}
2246
2247
int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
2248
{
2249
u64 seqno;
2250
int rc;
2251
2252
/*
2253
* enc_payload() calls aesgcm_encrypt(), which can potentially offload to HW.
2254
* The offload's DMA SG list of data to encrypt has to be in linear mapping.
2255
*/
2256
if (!virt_addr_valid(req->req_buf) || !virt_addr_valid(req->resp_buf)) {
2257
pr_warn("AES-GSM buffers must be in linear mapping");
2258
return -EINVAL;
2259
}
2260
2261
guard(mutex)(&snp_cmd_mutex);
2262
2263
/* Check if the VMPCK is not empty */
2264
if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
2265
pr_err_ratelimited("VMPCK is disabled\n");
2266
return -ENOTTY;
2267
}
2268
2269
/* Get message sequence and verify that its a non-zero */
2270
seqno = snp_get_msg_seqno(mdesc);
2271
if (!seqno)
2272
return -EIO;
2273
2274
/* Clear shared memory's response for the host to populate. */
2275
memset(mdesc->response, 0, sizeof(struct snp_guest_msg));
2276
2277
/* Encrypt the userspace provided payload in mdesc->secret_request. */
2278
rc = enc_payload(mdesc, seqno, req);
2279
if (rc)
2280
return rc;
2281
2282
/*
2283
* Write the fully encrypted request to the shared unencrypted
2284
* request page.
2285
*/
2286
memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request));
2287
2288
/* Initialize the input address for guest request */
2289
req->input.req_gpa = __pa(mdesc->request);
2290
req->input.resp_gpa = __pa(mdesc->response);
2291
req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0;
2292
2293
rc = __handle_guest_request(mdesc, req);
2294
if (rc) {
2295
if (rc == -EIO &&
2296
req->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
2297
return rc;
2298
2299
pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n",
2300
rc, req->exitinfo2);
2301
2302
snp_disable_vmpck(mdesc);
2303
return rc;
2304
}
2305
2306
rc = verify_and_dec_payload(mdesc, req);
2307
if (rc) {
2308
pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc);
2309
snp_disable_vmpck(mdesc);
2310
return rc;
2311
}
2312
2313
return 0;
2314
}
2315
EXPORT_SYMBOL_GPL(snp_send_guest_request);
2316
2317
static int __init snp_get_tsc_info(void)
2318
{
2319
struct snp_tsc_info_resp *tsc_resp;
2320
struct snp_tsc_info_req *tsc_req;
2321
struct snp_msg_desc *mdesc;
2322
struct snp_guest_req req = {};
2323
int rc = -ENOMEM;
2324
2325
tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL);
2326
if (!tsc_req)
2327
return rc;
2328
2329
/*
2330
* The intermediate response buffer is used while decrypting the
2331
* response payload. Make sure that it has enough space to cover
2332
* the authtag.
2333
*/
2334
tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL);
2335
if (!tsc_resp)
2336
goto e_free_tsc_req;
2337
2338
mdesc = snp_msg_alloc();
2339
if (IS_ERR_OR_NULL(mdesc))
2340
goto e_free_tsc_resp;
2341
2342
rc = snp_msg_init(mdesc, snp_vmpl);
2343
if (rc)
2344
goto e_free_mdesc;
2345
2346
req.msg_version = MSG_HDR_VER;
2347
req.msg_type = SNP_MSG_TSC_INFO_REQ;
2348
req.vmpck_id = snp_vmpl;
2349
req.req_buf = tsc_req;
2350
req.req_sz = sizeof(*tsc_req);
2351
req.resp_buf = (void *)tsc_resp;
2352
req.resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN;
2353
req.exit_code = SVM_VMGEXIT_GUEST_REQUEST;
2354
2355
rc = snp_send_guest_request(mdesc, &req);
2356
if (rc)
2357
goto e_request;
2358
2359
pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n",
2360
__func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset,
2361
tsc_resp->tsc_factor);
2362
2363
if (!tsc_resp->status) {
2364
snp_tsc_scale = tsc_resp->tsc_scale;
2365
snp_tsc_offset = tsc_resp->tsc_offset;
2366
} else {
2367
pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status);
2368
rc = -EIO;
2369
}
2370
2371
e_request:
2372
/* The response buffer contains sensitive data, explicitly clear it. */
2373
memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN);
2374
e_free_mdesc:
2375
snp_msg_free(mdesc);
2376
e_free_tsc_resp:
2377
kfree(tsc_resp);
2378
e_free_tsc_req:
2379
kfree(tsc_req);
2380
2381
return rc;
2382
}
2383
2384
void __init snp_secure_tsc_prepare(void)
2385
{
2386
if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
2387
return;
2388
2389
if (snp_get_tsc_info()) {
2390
pr_alert("Unable to retrieve Secure TSC info from ASP\n");
2391
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
2392
}
2393
2394
pr_debug("SecureTSC enabled");
2395
}
2396
2397
static unsigned long securetsc_get_tsc_khz(void)
2398
{
2399
return snp_tsc_freq_khz;
2400
}
2401
2402
void __init snp_secure_tsc_init(void)
2403
{
2404
struct snp_secrets_page *secrets;
2405
unsigned long tsc_freq_mhz;
2406
void *mem;
2407
2408
if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
2409
return;
2410
2411
mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE);
2412
if (!mem) {
2413
pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n");
2414
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
2415
}
2416
2417
secrets = (__force struct snp_secrets_page *)mem;
2418
2419
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
2420
rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
2421
2422
/* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */
2423
tsc_freq_mhz &= GENMASK_ULL(17, 0);
2424
2425
snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);
2426
2427
x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
2428
x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
2429
2430
early_memunmap(mem, PAGE_SIZE);
2431
}
2432
2433