CoCalc -- sev-shared.c

GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/boot/startup/sev-shared.c
²⁹²⁶⁹ views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
 * AMD Encrypted Register State Support
4
 *
5
 * Author: Joerg Roedel <[email protected]>
6
 *
7
 * This file is not compiled stand-alone. It contains code shared
8
 * between the pre-decompression boot code and the running Linux kernel
9
 * and is included directly into both code-bases.
10
 */
11

12
#include <asm/setup_data.h>
13

14
#ifndef __BOOT_COMPRESSED
15
#define has_cpuflag(f)			boot_cpu_has(f)
16
#else
17
#undef WARN
18
#define WARN(condition, format...) (!!(condition))
19
#endif
20

21
/* Copy of the SNP firmware's CPUID page. */
22
static struct snp_cpuid_table cpuid_table_copy __ro_after_init;
23

24
/*
25
 * These will be initialized based on CPUID table so that non-present
26
 * all-zero leaves (for sparse tables) can be differentiated from
27
 * invalid/out-of-range leaves. This is needed since all-zero leaves
28
 * still need to be post-processed.
29
 */
30
static u32 cpuid_std_range_max __ro_after_init;
31
static u32 cpuid_hyp_range_max __ro_after_init;
32
static u32 cpuid_ext_range_max __ro_after_init;
33

34
bool sev_snp_needs_sfw;
35

36
void __noreturn
37
sev_es_terminate(unsigned int set, unsigned int reason)
38
{
39
	u64 val = GHCB_MSR_TERM_REQ;
40

41
	/* Tell the hypervisor what went wrong. */
42
	val |= GHCB_SEV_TERM_REASON(set, reason);
43

44
	/* Request Guest Termination from Hypervisor */
45
	sev_es_wr_ghcb_msr(val);
46
	VMGEXIT();
47

48
	while (true)
49
		asm volatile("hlt\n" : : : "memory");
50
}
51

52
/*
53
 * The hypervisor features are available from GHCB version 2 onward.
54
 */
55
u64 __init get_hv_features(void)
56
{
57
	u64 val;
58

59
	if (ghcb_version < 2)
60
		return 0;
61

62
	sev_es_wr_ghcb_msr(GHCB_MSR_HV_FT_REQ);
63
	VMGEXIT();
64

65
	val = sev_es_rd_ghcb_msr();
66
	if (GHCB_RESP_CODE(val) != GHCB_MSR_HV_FT_RESP)
67
		return 0;
68

69
	return GHCB_MSR_HV_FT_RESP_VAL(val);
70
}
71

72
int svsm_process_result_codes(struct svsm_call *call)
73
{
74
	switch (call->rax_out) {
75
	case SVSM_SUCCESS:
76
		return 0;
77
	case SVSM_ERR_INCOMPLETE:
78
	case SVSM_ERR_BUSY:
79
		return -EAGAIN;
80
	default:
81
		return -EINVAL;
82
	}
83
}
84

85
/*
86
 * Issue a VMGEXIT to call the SVSM:
87
 *   - Load the SVSM register state (RAX, RCX, RDX, R8 and R9)
88
 *   - Set the CA call pending field to 1
89
 *   - Issue VMGEXIT
90
 *   - Save the SVSM return register state (RAX, RCX, RDX, R8 and R9)
91
 *   - Perform atomic exchange of the CA call pending field
92
 *
93
 *   - See the "Secure VM Service Module for SEV-SNP Guests" specification for
94
 *     details on the calling convention.
95
 *     - The calling convention loosely follows the Microsoft X64 calling
96
 *       convention by putting arguments in RCX, RDX, R8 and R9.
97
 *     - RAX specifies the SVSM protocol/callid as input and the return code
98
 *       as output.
99
 */
100
void svsm_issue_call(struct svsm_call *call, u8 *pending)
101
{
102
	register unsigned long rax asm("rax") = call->rax;
103
	register unsigned long rcx asm("rcx") = call->rcx;
104
	register unsigned long rdx asm("rdx") = call->rdx;
105
	register unsigned long r8  asm("r8")  = call->r8;
106
	register unsigned long r9  asm("r9")  = call->r9;
107

108
	call->caa->call_pending = 1;
109

110
	asm volatile("rep; vmmcall\n\t"
111
		     : "+r" (rax), "+r" (rcx), "+r" (rdx), "+r" (r8), "+r" (r9)
112
		     : : "memory");
113

114
	*pending = xchg(&call->caa->call_pending, *pending);
115

116
	call->rax_out = rax;
117
	call->rcx_out = rcx;
118
	call->rdx_out = rdx;
119
	call->r8_out  = r8;
120
	call->r9_out  = r9;
121
}
122

123
int svsm_perform_msr_protocol(struct svsm_call *call)
124
{
125
	u8 pending = 0;
126
	u64 val, resp;
127

128
	/*
129
	 * When using the MSR protocol, be sure to save and restore
130
	 * the current MSR value.
131
	 */
132
	val = sev_es_rd_ghcb_msr();
133

134
	sev_es_wr_ghcb_msr(GHCB_MSR_VMPL_REQ_LEVEL(0));
135

136
	svsm_issue_call(call, &pending);
137

138
	resp = sev_es_rd_ghcb_msr();
139

140
	sev_es_wr_ghcb_msr(val);
141

142
	if (pending)
143
		return -EINVAL;
144

145
	if (GHCB_RESP_CODE(resp) != GHCB_MSR_VMPL_RESP)
146
		return -EINVAL;
147

148
	if (GHCB_MSR_VMPL_RESP_VAL(resp))
149
		return -EINVAL;
150

151
	return svsm_process_result_codes(call);
152
}
153

154
static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg)
155
{
156
	u64 val;
157

158
	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, reg_idx));
159
	VMGEXIT();
160
	val = sev_es_rd_ghcb_msr();
161
	if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
162
		return -EIO;
163

164
	*reg = (val >> 32);
165

166
	return 0;
167
}
168

169
static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf)
170
{
171
	int ret;
172

173
	/*
174
	 * MSR protocol does not support fetching non-zero subfunctions, but is
175
	 * sufficient to handle current early-boot cases. Should that change,
176
	 * make sure to report an error rather than ignoring the index and
177
	 * grabbing random values. If this issue arises in the future, handling
178
	 * can be added here to use GHCB-page protocol for cases that occur late
179
	 * enough in boot that GHCB page is available.
180
	 */
181
	if (cpuid_function_is_indexed(leaf->fn) && leaf->subfn)
182
		return -EINVAL;
183

184
	ret =         __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EAX, &leaf->eax);
185
	ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EBX, &leaf->ebx);
186
	ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_ECX, &leaf->ecx);
187
	ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EDX, &leaf->edx);
188

189
	return ret;
190
}
191

192

193

194
/*
195
 * This may be called early while still running on the initial identity
196
 * mapping. Use RIP-relative addressing to obtain the correct address
197
 * while running with the initial identity mapping as well as the
198
 * switch-over to kernel virtual addresses later.
199
 */
200
const struct snp_cpuid_table *snp_cpuid_get_table(void)
201
{
202
	return rip_rel_ptr(&cpuid_table_copy);
203
}
204

205
/*
206
 * The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of
207
 * XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0
208
 * and 1 based on the corresponding features enabled by a particular
209
 * combination of XCR0 and XSS registers so that a guest can look up the
210
 * version corresponding to the features currently enabled in its XCR0/XSS
211
 * registers. The only values that differ between these versions/table
212
 * entries is the enabled XSAVE area size advertised via EBX.
213
 *
214
 * While hypervisors may choose to make use of this support, it is more
215
 * robust/secure for a guest to simply find the entry corresponding to the
216
 * base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the
217
 * XSAVE area size using subfunctions 2 through 64, as documented in APM
218
 * Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here.
219
 *
220
 * Since base/legacy XSAVE area size is documented as 0x240, use that value
221
 * directly rather than relying on the base size in the CPUID table.
222
 *
223
 * Return: XSAVE area size on success, 0 otherwise.
224
 */
225
static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
226
{
227
	const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
228
	u64 xfeatures_found = 0;
229
	u32 xsave_size = 0x240;
230
	int i;
231

232
	for (i = 0; i < cpuid_table->count; i++) {
233
		const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
234

235
		if (!(e->eax_in == 0xD && e->ecx_in > 1 && e->ecx_in < 64))
236
			continue;
237
		if (!(xfeatures_en & (BIT_ULL(e->ecx_in))))
238
			continue;
239
		if (xfeatures_found & (BIT_ULL(e->ecx_in)))
240
			continue;
241

242
		xfeatures_found |= (BIT_ULL(e->ecx_in));
243

244
		if (compacted)
245
			xsave_size += e->eax;
246
		else
247
			xsave_size = max(xsave_size, e->eax + e->ebx);
248
	}
249

250
	/*
251
	 * Either the guest set unsupported XCR0/XSS bits, or the corresponding
252
	 * entries in the CPUID table were not present. This is not a valid
253
	 * state to be in.
254
	 */
255
	if (xfeatures_found != (xfeatures_en & GENMASK_ULL(63, 2)))
256
		return 0;
257

258
	return xsave_size;
259
}
260

261
static bool
262
snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
263
{
264
	const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
265
	int i;
266

267
	for (i = 0; i < cpuid_table->count; i++) {
268
		const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
269

270
		if (e->eax_in != leaf->fn)
271
			continue;
272

273
		if (cpuid_function_is_indexed(leaf->fn) && e->ecx_in != leaf->subfn)
274
			continue;
275

276
		/*
277
		 * For 0xD subfunctions 0 and 1, only use the entry corresponding
278
		 * to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0).
279
		 * See the comments above snp_cpuid_calc_xsave_size() for more
280
		 * details.
281
		 */
282
		if (e->eax_in == 0xD && (e->ecx_in == 0 || e->ecx_in == 1))
283
			if (!(e->xcr0_in == 1 || e->xcr0_in == 3) || e->xss_in)
284
				continue;
285

286
		leaf->eax = e->eax;
287
		leaf->ebx = e->ebx;
288
		leaf->ecx = e->ecx;
289
		leaf->edx = e->edx;
290

291
		return true;
292
	}
293

294
	return false;
295
}
296

297
static void snp_cpuid_hv_msr(void *ctx, struct cpuid_leaf *leaf)
298
{
299
	if (__sev_cpuid_hv_msr(leaf))
300
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
301
}
302

303
static int
304
snp_cpuid_postprocess(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf),
305
		      void *ctx, struct cpuid_leaf *leaf)
306
{
307
	struct cpuid_leaf leaf_hv = *leaf;
308

309
	switch (leaf->fn) {
310
	case 0x1:
311
		cpuid_fn(ctx, &leaf_hv);
312

313
		/* initial APIC ID */
314
		leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0));
315
		/* APIC enabled bit */
316
		leaf->edx = (leaf_hv.edx & BIT(9)) | (leaf->edx & ~BIT(9));
317

318
		/* OSXSAVE enabled bit */
319
		if (native_read_cr4() & X86_CR4_OSXSAVE)
320
			leaf->ecx |= BIT(27);
321
		break;
322
	case 0x7:
323
		/* OSPKE enabled bit */
324
		leaf->ecx &= ~BIT(4);
325
		if (native_read_cr4() & X86_CR4_PKE)
326
			leaf->ecx |= BIT(4);
327
		break;
328
	case 0xB:
329
		leaf_hv.subfn = 0;
330
		cpuid_fn(ctx, &leaf_hv);
331

332
		/* extended APIC ID */
333
		leaf->edx = leaf_hv.edx;
334
		break;
335
	case 0xD: {
336
		bool compacted = false;
337
		u64 xcr0 = 1, xss = 0;
338
		u32 xsave_size;
339

340
		if (leaf->subfn != 0 && leaf->subfn != 1)
341
			return 0;
342

343
		if (native_read_cr4() & X86_CR4_OSXSAVE)
344
			xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
345
		if (leaf->subfn == 1) {
346
			/* Get XSS value if XSAVES is enabled. */
347
			if (leaf->eax & BIT(3)) {
348
				unsigned long lo, hi;
349

350
				asm volatile("rdmsr" : "=a" (lo), "=d" (hi)
351
						     : "c" (MSR_IA32_XSS));
352
				xss = (hi << 32) | lo;
353
			}
354

355
			/*
356
			 * The PPR and APM aren't clear on what size should be
357
			 * encoded in 0xD:0x1:EBX when compaction is not enabled
358
			 * by either XSAVEC (feature bit 1) or XSAVES (feature
359
			 * bit 3) since SNP-capable hardware has these feature
360
			 * bits fixed as 1. KVM sets it to 0 in this case, but
361
			 * to avoid this becoming an issue it's safer to simply
362
			 * treat this as unsupported for SNP guests.
363
			 */
364
			if (!(leaf->eax & (BIT(1) | BIT(3))))
365
				return -EINVAL;
366

367
			compacted = true;
368
		}
369

370
		xsave_size = snp_cpuid_calc_xsave_size(xcr0 | xss, compacted);
371
		if (!xsave_size)
372
			return -EINVAL;
373

374
		leaf->ebx = xsave_size;
375
		}
376
		break;
377
	case 0x8000001E:
378
		cpuid_fn(ctx, &leaf_hv);
379

380
		/* extended APIC ID */
381
		leaf->eax = leaf_hv.eax;
382
		/* compute ID */
383
		leaf->ebx = (leaf->ebx & GENMASK(31, 8)) | (leaf_hv.ebx & GENMASK(7, 0));
384
		/* node ID */
385
		leaf->ecx = (leaf->ecx & GENMASK(31, 8)) | (leaf_hv.ecx & GENMASK(7, 0));
386
		break;
387
	default:
388
		/* No fix-ups needed, use values as-is. */
389
		break;
390
	}
391

392
	return 0;
393
}
394

395
/*
396
 * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
397
 * should be treated as fatal by caller.
398
 */
399
int snp_cpuid(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf),
400
	      void *ctx, struct cpuid_leaf *leaf)
401
{
402
	const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
403

404
	if (!cpuid_table->count)
405
		return -EOPNOTSUPP;
406

407
	if (!snp_cpuid_get_validated_func(leaf)) {
408
		/*
409
		 * Some hypervisors will avoid keeping track of CPUID entries
410
		 * where all values are zero, since they can be handled the
411
		 * same as out-of-range values (all-zero). This is useful here
412
		 * as well as it allows virtually all guest configurations to
413
		 * work using a single SNP CPUID table.
414
		 *
415
		 * To allow for this, there is a need to distinguish between
416
		 * out-of-range entries and in-range zero entries, since the
417
		 * CPUID table entries are only a template that may need to be
418
		 * augmented with additional values for things like
419
		 * CPU-specific information during post-processing. So if it's
420
		 * not in the table, set the values to zero. Then, if they are
421
		 * within a valid CPUID range, proceed with post-processing
422
		 * using zeros as the initial values. Otherwise, skip
423
		 * post-processing and just return zeros immediately.
424
		 */
425
		leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0;
426

427
		/* Skip post-processing for out-of-range zero leafs. */
428
		if (!(leaf->fn <= cpuid_std_range_max ||
429
		      (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) ||
430
		      (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max)))
431
			return 0;
432
	}
433

434
	return snp_cpuid_postprocess(cpuid_fn, ctx, leaf);
435
}
436

437
/*
438
 * Boot VC Handler - This is the first VC handler during boot, there is no GHCB
439
 * page yet, so it only supports the MSR based communication with the
440
 * hypervisor and only the CPUID exit-code.
441
 */
442
void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
443
{
444
	unsigned int subfn = lower_bits(regs->cx, 32);
445
	unsigned int fn = lower_bits(regs->ax, 32);
446
	u16 opcode = *(unsigned short *)regs->ip;
447
	struct cpuid_leaf leaf;
448
	int ret;
449

450
	/* Only CPUID is supported via MSR protocol */
451
	if (exit_code != SVM_EXIT_CPUID)
452
		goto fail;
453

454
	/* Is it really a CPUID insn? */
455
	if (opcode != 0xa20f)
456
		goto fail;
457

458
	leaf.fn = fn;
459
	leaf.subfn = subfn;
460

461
	/*
462
	 * If SNP is active, then snp_cpuid() uses the CPUID table to obtain the
463
	 * CPUID values (with possible HV interaction during post-processing of
464
	 * the values). But if SNP is not active (no CPUID table present), then
465
	 * snp_cpuid() returns -EOPNOTSUPP so that an SEV-ES guest can call the
466
	 * HV to obtain the CPUID information.
467
	 */
468
	ret = snp_cpuid(snp_cpuid_hv_msr, NULL, &leaf);
469
	if (!ret)
470
		goto cpuid_done;
471

472
	if (ret != -EOPNOTSUPP)
473
		goto fail;
474

475
	/*
476
	 * This is reached by a SEV-ES guest and needs to invoke the HV for
477
	 * the CPUID data.
478
	 */
479
	if (__sev_cpuid_hv_msr(&leaf))
480
		goto fail;
481

482
cpuid_done:
483
	regs->ax = leaf.eax;
484
	regs->bx = leaf.ebx;
485
	regs->cx = leaf.ecx;
486
	regs->dx = leaf.edx;
487

488
	/*
489
	 * This is a VC handler and the #VC is only raised when SEV-ES is
490
	 * active, which means SEV must be active too. Do sanity checks on the
491
	 * CPUID results to make sure the hypervisor does not trick the kernel
492
	 * into the no-sev path. This could map sensitive data unencrypted and
493
	 * make it accessible to the hypervisor.
494
	 *
495
	 * In particular, check for:
496
	 *	- Availability of CPUID leaf 0x8000001f
497
	 *	- SEV CPUID bit.
498
	 *
499
	 * The hypervisor might still report the wrong C-bit position, but this
500
	 * can't be checked here.
501
	 */
502

503
	if (fn == 0x80000000 && (regs->ax < 0x8000001f))
504
		/* SEV leaf check */
505
		goto fail;
506
	else if ((fn == 0x8000001f && !(regs->ax & BIT(1))))
507
		/* SEV bit */
508
		goto fail;
509

510
	/* Skip over the CPUID two-byte opcode */
511
	regs->ip += 2;
512

513
	return;
514

515
fail:
516
	/* Terminate the guest */
517
	sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
518
}
519

520
struct cc_setup_data {
521
	struct setup_data header;
522
	u32 cc_blob_address;
523
};
524

525
/*
526
 * Search for a Confidential Computing blob passed in as a setup_data entry
527
 * via the Linux Boot Protocol.
528
 */
529
static __init
530
struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
531
{
532
	struct cc_setup_data *sd = NULL;
533
	struct setup_data *hdr;
534

535
	hdr = (struct setup_data *)bp->hdr.setup_data;
536

537
	while (hdr) {
538
		if (hdr->type == SETUP_CC_BLOB) {
539
			sd = (struct cc_setup_data *)hdr;
540
			return (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address;
541
		}
542
		hdr = (struct setup_data *)hdr->next;
543
	}
544

545
	return NULL;
546
}
547

548
/*
549
 * Initialize the kernel's copy of the SNP CPUID table, and set up the
550
 * pointer that will be used to access it.
551
 *
552
 * Maintaining a direct mapping of the SNP CPUID table used by firmware would
553
 * be possible as an alternative, but the approach is brittle since the
554
 * mapping needs to be updated in sync with all the changes to virtual memory
555
 * layout and related mapping facilities throughout the boot process.
556
 */
557
static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
558
{
559
	const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table;
560
	int i;
561

562
	if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE)
563
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
564

565
	cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys;
566
	if (!cpuid_table_fw->count || cpuid_table_fw->count > SNP_CPUID_COUNT_MAX)
567
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
568

569
	cpuid_table = snp_cpuid_get_table();
570
	memcpy((void *)cpuid_table, cpuid_table_fw, sizeof(*cpuid_table));
571

572
	/* Initialize CPUID ranges for range-checking. */
573
	for (i = 0; i < cpuid_table->count; i++) {
574
		const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
575

576
		if (fn->eax_in == 0x0)
577
			cpuid_std_range_max = fn->eax;
578
		else if (fn->eax_in == 0x40000000)
579
			cpuid_hyp_range_max = fn->eax;
580
		else if (fn->eax_in == 0x80000000)
581
			cpuid_ext_range_max = fn->eax;
582
	}
583
}
584

585
static int svsm_call_msr_protocol(struct svsm_call *call)
586
{
587
	int ret;
588

589
	do {
590
		ret = svsm_perform_msr_protocol(call);
591
	} while (ret == -EAGAIN);
592

593
	return ret;
594
}
595

596
static void svsm_pval_4k_page(unsigned long paddr, bool validate,
597
			      struct svsm_ca *caa, u64 caa_pa)
598
{
599
	struct svsm_pvalidate_call *pc;
600
	struct svsm_call call = {};
601
	unsigned long flags;
602
	u64 pc_pa;
603

604
	/*
605
	 * This can be called very early in the boot, use native functions in
606
	 * order to avoid paravirt issues.
607
	 */
608
	flags = native_local_irq_save();
609

610
	call.caa = caa;
611

612
	pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
613
	pc_pa = caa_pa + offsetof(struct svsm_ca, svsm_buffer);
614

615
	pc->num_entries = 1;
616
	pc->cur_index   = 0;
617
	pc->entry[0].page_size = RMP_PG_SIZE_4K;
618
	pc->entry[0].action    = validate;
619
	pc->entry[0].ignore_cf = 0;
620
	pc->entry[0].rsvd      = 0;
621
	pc->entry[0].pfn       = paddr >> PAGE_SHIFT;
622

623
	/* Protocol 0, Call ID 1 */
624
	call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
625
	call.rcx = pc_pa;
626

627
	/*
628
	 * Use the MSR protocol exclusively, so that this code is usable in
629
	 * startup code where VA/PA translations of the GHCB page's address may
630
	 * be problematic.
631
	 */
632
	if (svsm_call_msr_protocol(&call))
633
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
634

635
	native_local_irq_restore(flags);
636
}
637

638
static void pvalidate_4k_page(unsigned long vaddr, unsigned long paddr,
639
			      bool validate, struct svsm_ca *caa, u64 caa_pa)
640
{
641
	int ret;
642

643
	if (snp_vmpl) {
644
		svsm_pval_4k_page(paddr, validate, caa, caa_pa);
645
	} else {
646
		ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
647
		if (ret)
648
			sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
649
	}
650

651
	/*
652
	 * If validating memory (making it private) and affected by the
653
	 * cache-coherency vulnerability, perform the cache eviction mitigation.
654
	 */
655
	if (validate && sev_snp_needs_sfw)
656
		sev_evict_cache((void *)vaddr, 1);
657
}
658

659
static void __page_state_change(unsigned long vaddr, unsigned long paddr,
660
			        const struct psc_desc *desc)
661
{
662
	u64 val, msr;
663

664
	/*
665
	 * If private -> shared then invalidate the page before requesting the
666
	 * state change in the RMP table.
667
	 */
668
	if (desc->op == SNP_PAGE_STATE_SHARED)
669
		pvalidate_4k_page(vaddr, paddr, false, desc->ca, desc->caa_pa);
670

671
	/* Save the current GHCB MSR value */
672
	msr = sev_es_rd_ghcb_msr();
673

674
	/* Issue VMGEXIT to change the page state in RMP table. */
675
	sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, desc->op));
676
	VMGEXIT();
677

678
	/* Read the response of the VMGEXIT. */
679
	val = sev_es_rd_ghcb_msr();
680
	if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val))
681
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
682

683
	/* Restore the GHCB MSR value */
684
	sev_es_wr_ghcb_msr(msr);
685

686
	/*
687
	 * Now that page state is changed in the RMP table, validate it so that it is
688
	 * consistent with the RMP entry.
689
	 */
690
	if (desc->op == SNP_PAGE_STATE_PRIVATE)
691
		pvalidate_4k_page(vaddr, paddr, true, desc->ca, desc->caa_pa);
692
}
693

694
/*
695
 * Maintain the GPA of the SVSM Calling Area (CA) in order to utilize the SVSM
696
 * services needed when not running in VMPL0.
697
 */
698
static bool __init svsm_setup_ca(const struct cc_blob_sev_info *cc_info,
699
				 void *page)
700
{
701
	struct snp_secrets_page *secrets_page;
702
	struct snp_cpuid_table *cpuid_table;
703
	unsigned int i;
704
	u64 caa;
705

706
	BUILD_BUG_ON(sizeof(*secrets_page) != PAGE_SIZE);
707

708
	/*
709
	 * Check if running at VMPL0.
710
	 *
711
	 * Use RMPADJUST (see the rmpadjust() function for a description of what
712
	 * the instruction does) to update the VMPL1 permissions of a page. If
713
	 * the guest is running at VMPL0, this will succeed and implies there is
714
	 * no SVSM. If the guest is running at any other VMPL, this will fail.
715
	 * Linux SNP guests only ever run at a single VMPL level so permission mask
716
	 * changes of a lesser-privileged VMPL are a don't-care.
717
	 *
718
	 * Use a rip-relative reference to obtain the proper address, since this
719
	 * routine is running identity mapped when called, both by the decompressor
720
	 * code and the early kernel code.
721
	 */
722
	if (!rmpadjust((unsigned long)page, RMP_PG_SIZE_4K, 1))
723
		return false;
724

725
	/*
726
	 * Not running at VMPL0, ensure everything has been properly supplied
727
	 * for running under an SVSM.
728
	 */
729
	if (!cc_info || !cc_info->secrets_phys || cc_info->secrets_len != PAGE_SIZE)
730
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECRETS_PAGE);
731

732
	secrets_page = (struct snp_secrets_page *)cc_info->secrets_phys;
733
	if (!secrets_page->svsm_size)
734
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NO_SVSM);
735

736
	if (!secrets_page->svsm_guest_vmpl)
737
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_VMPL0);
738

739
	snp_vmpl = secrets_page->svsm_guest_vmpl;
740

741
	caa = secrets_page->svsm_caa;
742

743
	/*
744
	 * An open-coded PAGE_ALIGNED() in order to avoid including
745
	 * kernel-proper headers into the decompressor.
746
	 */
747
	if (caa & (PAGE_SIZE - 1))
748
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CAA);
749

750
	boot_svsm_caa_pa = caa;
751

752
	/* Advertise the SVSM presence via CPUID. */
753
	cpuid_table = (struct snp_cpuid_table *)snp_cpuid_get_table();
754
	for (i = 0; i < cpuid_table->count; i++) {
755
		struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
756

757
		if (fn->eax_in == 0x8000001f)
758
			fn->eax |= BIT(28);
759
	}
760

761
	return true;
762
}
763

764
Product

Resources

Company