Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/s390/kvm/pv.c
54339 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Hosting Protected Virtual Machines
4
*
5
* Copyright IBM Corp. 2019, 2020
6
* Author(s): Janosch Frank <[email protected]>
7
*/
8
9
#include <linux/export.h>
10
#include <linux/kvm.h>
11
#include <linux/kvm_host.h>
12
#include <linux/minmax.h>
13
#include <linux/pagemap.h>
14
#include <linux/sched/signal.h>
15
#include <asm/uv.h>
16
#include <asm/mman.h>
17
#include <linux/pagewalk.h>
18
#include <linux/sched/mm.h>
19
#include <linux/mmu_notifier.h>
20
#include "kvm-s390.h"
21
#include "dat.h"
22
#include "gaccess.h"
23
#include "gmap.h"
24
#include "faultin.h"
25
26
bool kvm_s390_pv_is_protected(struct kvm *kvm)
27
{
28
lockdep_assert_held(&kvm->lock);
29
return !!kvm_s390_pv_get_handle(kvm);
30
}
31
EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
32
33
bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
34
{
35
lockdep_assert_held(&vcpu->mutex);
36
return !!kvm_s390_pv_cpu_get_handle(vcpu);
37
}
38
EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
39
40
/**
41
* should_export_before_import() - Determine whether an export is needed
42
* before an import-like operation.
43
* @uvcb: The Ultravisor control block of the UVC to be performed.
44
* @mm: The mm of the process.
45
*
46
* Returns whether an export is needed before every import-like operation.
47
* This is needed for shared pages, which don't trigger a secure storage
48
* exception when accessed from a different guest.
49
*
50
* Although considered as one, the Unpin Page UVC is not an actual import,
51
* so it is not affected.
52
*
53
* No export is needed also when there is only one protected VM, because the
54
* page cannot belong to the wrong VM in that case (there is no "other VM"
55
* it can belong to).
56
*
57
* Return: %true if an export is needed before every import, otherwise %false.
58
*/
59
static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
60
{
61
/*
62
* The misc feature indicates, among other things, that importing a
63
* shared page from a different protected VM will automatically also
64
* transfer its ownership.
65
*/
66
if (uv_has_feature(BIT_UV_FEAT_MISC))
67
return false;
68
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
69
return false;
70
return atomic_read(&mm->context.protected_count) > 1;
71
}
72
73
struct pv_make_secure {
74
void *uvcb;
75
struct folio *folio;
76
int rc;
77
bool needs_export;
78
};
79
80
static int __kvm_s390_pv_make_secure(struct guest_fault *f, struct folio *folio)
81
{
82
struct pv_make_secure *priv = f->priv;
83
int rc;
84
85
if (priv->needs_export)
86
uv_convert_from_secure(folio_to_phys(folio));
87
88
if (folio_test_hugetlb(folio))
89
return -EFAULT;
90
if (folio_test_large(folio))
91
return -E2BIG;
92
93
if (!f->page)
94
folio_get(folio);
95
rc = __make_folio_secure(folio, priv->uvcb);
96
if (!f->page)
97
folio_put(folio);
98
99
return rc;
100
}
101
102
static void _kvm_s390_pv_make_secure(struct guest_fault *f)
103
{
104
struct pv_make_secure *priv = f->priv;
105
struct folio *folio;
106
107
folio = pfn_folio(f->pfn);
108
priv->rc = -EAGAIN;
109
if (folio_trylock(folio)) {
110
priv->rc = __kvm_s390_pv_make_secure(f, folio);
111
if (priv->rc == -E2BIG || priv->rc == -EBUSY) {
112
priv->folio = folio;
113
folio_get(folio);
114
}
115
folio_unlock(folio);
116
}
117
}
118
119
/**
120
* kvm_s390_pv_make_secure() - make one guest page secure
121
* @kvm: the guest
122
* @gaddr: the guest address that needs to be made secure
123
* @uvcb: the UVCB specifying which operation needs to be performed
124
*
125
* Context: needs to be called with kvm->srcu held.
126
* Return: 0 on success, < 0 in case of error.
127
*/
128
int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
129
{
130
struct pv_make_secure priv = { .uvcb = uvcb };
131
struct guest_fault f = {
132
.write_attempt = true,
133
.gfn = gpa_to_gfn(gaddr),
134
.callback = _kvm_s390_pv_make_secure,
135
.priv = &priv,
136
};
137
int rc;
138
139
lockdep_assert_held(&kvm->srcu);
140
141
priv.needs_export = should_export_before_import(uvcb, kvm->mm);
142
143
scoped_guard(mutex, &kvm->arch.pv.import_lock) {
144
rc = kvm_s390_faultin_gfn(NULL, kvm, &f);
145
146
if (!rc) {
147
rc = priv.rc;
148
if (priv.folio) {
149
rc = s390_wiggle_split_folio(kvm->mm, priv.folio);
150
if (!rc)
151
rc = -EAGAIN;
152
}
153
}
154
}
155
if (priv.folio)
156
folio_put(priv.folio);
157
return rc;
158
}
159
160
int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
161
{
162
struct uv_cb_cts uvcb = {
163
.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
164
.header.len = sizeof(uvcb),
165
.guest_handle = kvm_s390_pv_get_handle(kvm),
166
.gaddr = gaddr,
167
};
168
169
return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb);
170
}
171
172
/**
173
* kvm_s390_pv_destroy_page() - Destroy a guest page.
174
* @kvm: the guest
175
* @gaddr: the guest address to destroy
176
*
177
* An attempt will be made to destroy the given guest page. If the attempt
178
* fails, an attempt is made to export the page. If both attempts fail, an
179
* appropriate error is returned.
180
*
181
* Context: may sleep.
182
*/
183
int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr)
184
{
185
struct page *page;
186
int rc = 0;
187
188
mmap_read_lock(kvm->mm);
189
page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
190
if (page)
191
rc = __kvm_s390_pv_destroy_page(page);
192
kvm_release_page_clean(page);
193
mmap_read_unlock(kvm->mm);
194
return rc;
195
}
196
197
/**
198
* struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
199
* be destroyed
200
*
201
* @list: list head for the list of leftover VMs
202
* @old_gmap_table: the gmap table of the leftover protected VM
203
* @handle: the handle of the leftover protected VM
204
* @stor_var: pointer to the variable storage of the leftover protected VM
205
* @stor_base: address of the base storage of the leftover protected VM
206
*
207
* Represents a protected VM that is still registered with the Ultravisor,
208
* but which does not correspond any longer to an active KVM VM. It should
209
* be destroyed at some point later, either asynchronously or when the
210
* process terminates.
211
*/
212
struct pv_vm_to_be_destroyed {
213
struct list_head list;
214
unsigned long old_gmap_table;
215
u64 handle;
216
void *stor_var;
217
unsigned long stor_base;
218
};
219
220
static void kvm_s390_clear_pv_state(struct kvm *kvm)
221
{
222
kvm->arch.pv.handle = 0;
223
kvm->arch.pv.guest_len = 0;
224
kvm->arch.pv.stor_base = 0;
225
kvm->arch.pv.stor_var = NULL;
226
}
227
228
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
229
{
230
int cc;
231
232
if (!kvm_s390_pv_cpu_get_handle(vcpu))
233
return 0;
234
235
cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
236
237
KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
238
vcpu->vcpu_id, *rc, *rrc);
239
WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
240
241
/* Intended memory leak for something that should never happen. */
242
if (!cc)
243
free_pages(vcpu->arch.pv.stor_base,
244
get_order(uv_info.guest_cpu_stor_len));
245
246
free_page((unsigned long)sida_addr(vcpu->arch.sie_block));
247
vcpu->arch.sie_block->pv_handle_cpu = 0;
248
vcpu->arch.sie_block->pv_handle_config = 0;
249
memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
250
vcpu->arch.sie_block->sdf = 0;
251
/*
252
* The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
253
* Use the reset value of gbea to avoid leaking the kernel pointer of
254
* the just freed sida.
255
*/
256
vcpu->arch.sie_block->gbea = 1;
257
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
258
259
return cc ? EIO : 0;
260
}
261
262
int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
263
{
264
struct uv_cb_csc uvcb = {
265
.header.cmd = UVC_CMD_CREATE_SEC_CPU,
266
.header.len = sizeof(uvcb),
267
};
268
void *sida_addr;
269
int cc;
270
271
if (kvm_s390_pv_cpu_get_handle(vcpu))
272
return -EINVAL;
273
274
vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
275
get_order(uv_info.guest_cpu_stor_len));
276
if (!vcpu->arch.pv.stor_base)
277
return -ENOMEM;
278
279
/* Input */
280
uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
281
uvcb.num = vcpu->arch.sie_block->icpua;
282
uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block);
283
uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base);
284
285
/* Alloc Secure Instruction Data Area Designation */
286
sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
287
if (!sida_addr) {
288
free_pages(vcpu->arch.pv.stor_base,
289
get_order(uv_info.guest_cpu_stor_len));
290
return -ENOMEM;
291
}
292
vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr);
293
294
cc = uv_call(0, (u64)&uvcb);
295
*rc = uvcb.header.rc;
296
*rrc = uvcb.header.rrc;
297
KVM_UV_EVENT(vcpu->kvm, 3,
298
"PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
299
vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
300
uvcb.header.rrc);
301
302
if (cc) {
303
u16 dummy;
304
305
kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
306
return -EIO;
307
}
308
309
/* Output */
310
vcpu->arch.pv.handle = uvcb.cpu_handle;
311
vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
312
vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
313
vcpu->arch.sie_block->sdf = 2;
314
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
315
return 0;
316
}
317
318
/* only free resources when the destroy was successful */
319
static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
320
{
321
vfree(kvm->arch.pv.stor_var);
322
free_pages(kvm->arch.pv.stor_base,
323
get_order(uv_info.guest_base_stor_len));
324
kvm_s390_clear_pv_state(kvm);
325
}
326
327
static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
328
{
329
unsigned long base = uv_info.guest_base_stor_len;
330
unsigned long virt = uv_info.guest_virt_var_stor_len;
331
unsigned long npages = 0, vlen = 0;
332
333
kvm->arch.pv.stor_var = NULL;
334
kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
335
if (!kvm->arch.pv.stor_base)
336
return -ENOMEM;
337
338
/*
339
* Calculate current guest storage for allocation of the
340
* variable storage, which is based on the length in MB.
341
*
342
* Slots are sorted by GFN
343
*/
344
mutex_lock(&kvm->slots_lock);
345
npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
346
mutex_unlock(&kvm->slots_lock);
347
348
kvm->arch.pv.guest_len = npages * PAGE_SIZE;
349
350
/* Allocate variable storage */
351
vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
352
vlen += uv_info.guest_virt_base_stor_len;
353
kvm->arch.pv.stor_var = vzalloc(vlen);
354
if (!kvm->arch.pv.stor_var)
355
goto out_err;
356
return 0;
357
358
out_err:
359
kvm_s390_pv_dealloc_vm(kvm);
360
return -ENOMEM;
361
}
362
363
/**
364
* kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM.
365
* @kvm: the KVM that was associated with this leftover protected VM
366
* @leftover: details about the leftover protected VM that needs a clean up
367
* @rc: the RC code of the Destroy Secure Configuration UVC
368
* @rrc: the RRC code of the Destroy Secure Configuration UVC
369
*
370
* Destroy one leftover protected VM.
371
* On success, kvm->mm->context.protected_count will be decremented atomically
372
* and all other resources used by the VM will be freed.
373
*
374
* Return: 0 in case of success, otherwise 1
375
*/
376
static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm,
377
struct pv_vm_to_be_destroyed *leftover,
378
u16 *rc, u16 *rrc)
379
{
380
int cc;
381
382
/* It used the destroy-fast UVC, nothing left to do here */
383
if (!leftover->handle)
384
goto done_fast;
385
cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
386
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc);
387
WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc);
388
if (cc)
389
return cc;
390
/*
391
* Intentionally leak unusable memory. If the UVC fails, the memory
392
* used for the VM and its metadata is permanently unusable.
393
* This can only happen in case of a serious KVM or hardware bug; it
394
* is not expected to happen in normal operation.
395
*/
396
free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len));
397
free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER);
398
vfree(leftover->stor_var);
399
done_fast:
400
atomic_dec(&kvm->mm->context.protected_count);
401
return 0;
402
}
403
404
static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
405
{
406
struct uv_cb_destroy_fast uvcb = {
407
.header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST,
408
.header.len = sizeof(uvcb),
409
.handle = kvm_s390_pv_get_handle(kvm),
410
};
411
int cc;
412
413
cc = uv_call_sched(0, (u64)&uvcb);
414
if (rc)
415
*rc = uvcb.header.rc;
416
if (rrc)
417
*rrc = uvcb.header.rrc;
418
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
419
uvcb.header.rc, uvcb.header.rrc);
420
WARN_ONCE(cc && uvcb.header.rc != 0x104,
421
"protvirt destroy vm fast failed handle %llx rc %x rrc %x",
422
kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
423
/* Intended memory leak on "impossible" error */
424
if (!cc)
425
kvm_s390_pv_dealloc_vm(kvm);
426
return cc ? -EIO : 0;
427
}
428
429
static inline bool is_destroy_fast_available(void)
430
{
431
return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list);
432
}
433
434
/**
435
* kvm_s390_pv_set_aside - Set aside a protected VM for later teardown.
436
* @kvm: the VM
437
* @rc: return value for the RC field of the UVCB
438
* @rrc: return value for the RRC field of the UVCB
439
*
440
* Set aside the protected VM for a subsequent teardown. The VM will be able
441
* to continue immediately as a non-secure VM, and the information needed to
442
* properly tear down the protected VM is set aside. If another protected VM
443
* was already set aside without starting its teardown, this function will
444
* fail.
445
* The CPUs of the protected VM need to be destroyed beforehand.
446
*
447
* Context: kvm->lock needs to be held
448
*
449
* Return: 0 in case of success, -EINVAL if another protected VM was already set
450
* aside, -ENOMEM if the system ran out of memory.
451
*/
452
int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
453
{
454
struct pv_vm_to_be_destroyed *priv;
455
int res = 0;
456
457
lockdep_assert_held(&kvm->lock);
458
/*
459
* If another protected VM was already prepared for teardown, refuse.
460
* A normal deinitialization has to be performed instead.
461
*/
462
if (kvm->arch.pv.set_aside)
463
return -EINVAL;
464
465
/* Guest with segment type ASCE, refuse to destroy asynchronously */
466
if (kvm->arch.gmap->asce.dt == TABLE_TYPE_SEGMENT)
467
return -EINVAL;
468
469
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
470
if (!priv)
471
return -ENOMEM;
472
473
if (is_destroy_fast_available()) {
474
res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc);
475
} else {
476
priv->stor_var = kvm->arch.pv.stor_var;
477
priv->stor_base = kvm->arch.pv.stor_base;
478
priv->handle = kvm_s390_pv_get_handle(kvm);
479
priv->old_gmap_table = (unsigned long)dereference_asce(kvm->arch.gmap->asce);
480
if (s390_replace_asce(kvm->arch.gmap))
481
res = -ENOMEM;
482
}
483
484
if (res) {
485
kfree(priv);
486
return res;
487
}
488
489
gmap_pv_destroy_range(kvm->arch.gmap, 0, gpa_to_gfn(SZ_2G), false);
490
kvm_s390_clear_pv_state(kvm);
491
kvm->arch.pv.set_aside = priv;
492
493
*rc = UVC_RC_EXECUTED;
494
*rrc = 42;
495
return 0;
496
}
497
498
/**
499
* kvm_s390_pv_deinit_vm - Deinitialize the current protected VM
500
* @kvm: the KVM whose protected VM needs to be deinitialized
501
* @rc: the RC code of the UVC
502
* @rrc: the RRC code of the UVC
503
*
504
* Deinitialize the current protected VM. This function will destroy and
505
* cleanup the current protected VM, but it will not cleanup the guest
506
* memory. This function should only be called when the protected VM has
507
* just been created and therefore does not have any guest memory, or when
508
* the caller cleans up the guest memory separately.
509
*
510
* This function should not fail, but if it does, the donated memory must
511
* not be freed.
512
*
513
* Context: kvm->lock needs to be held
514
*
515
* Return: 0 in case of success, otherwise -EIO
516
*/
517
int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
518
{
519
int cc;
520
521
cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
522
UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
523
if (!cc) {
524
atomic_dec(&kvm->mm->context.protected_count);
525
kvm_s390_pv_dealloc_vm(kvm);
526
} else {
527
/* Intended memory leak on "impossible" error */
528
s390_replace_asce(kvm->arch.gmap);
529
}
530
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
531
WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
532
533
return cc ? -EIO : 0;
534
}
535
536
/**
537
* kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated
538
* with a specific KVM.
539
* @kvm: the KVM to be cleaned up
540
* @rc: the RC code of the first failing UVC
541
* @rrc: the RRC code of the first failing UVC
542
*
543
* This function will clean up all protected VMs associated with a KVM.
544
* This includes the active one, the one prepared for deinitialization with
545
* kvm_s390_pv_set_aside, and any still pending in the need_cleanup list.
546
*
547
* Context: kvm->lock needs to be held unless being called from
548
* kvm_arch_destroy_vm.
549
*
550
* Return: 0 if all VMs are successfully cleaned up, otherwise -EIO
551
*/
552
int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
553
{
554
struct pv_vm_to_be_destroyed *cur;
555
bool need_zap = false;
556
u16 _rc, _rrc;
557
int cc = 0;
558
559
/*
560
* Nothing to do if the counter was already 0. Otherwise make sure
561
* the counter does not reach 0 before calling s390_uv_destroy_range.
562
*/
563
if (!atomic_inc_not_zero(&kvm->mm->context.protected_count))
564
return 0;
565
566
*rc = 1;
567
/* If the current VM is protected, destroy it */
568
if (kvm_s390_pv_get_handle(kvm)) {
569
cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc);
570
need_zap = true;
571
}
572
573
/* If a previous protected VM was set aside, put it in the need_cleanup list */
574
if (kvm->arch.pv.set_aside) {
575
list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup);
576
kvm->arch.pv.set_aside = NULL;
577
}
578
579
/* Cleanup all protected VMs in the need_cleanup list */
580
while (!list_empty(&kvm->arch.pv.need_cleanup)) {
581
cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list);
582
need_zap = true;
583
if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) {
584
cc = 1;
585
/*
586
* Only return the first error rc and rrc, so make
587
* sure it is not overwritten. All destroys will
588
* additionally be reported via KVM_UV_EVENT().
589
*/
590
if (*rc == UVC_RC_EXECUTED) {
591
*rc = _rc;
592
*rrc = _rrc;
593
}
594
}
595
list_del(&cur->list);
596
kfree(cur);
597
}
598
599
/*
600
* If the mm still has a mapping, try to mark all its pages as
601
* accessible. The counter should not reach zero before this
602
* cleanup has been performed.
603
*/
604
if (need_zap && mmget_not_zero(kvm->mm)) {
605
gmap_pv_destroy_range(kvm->arch.gmap, 0, asce_end(kvm->arch.gmap->asce), false);
606
mmput(kvm->mm);
607
}
608
609
/* Now the counter can safely reach 0 */
610
atomic_dec(&kvm->mm->context.protected_count);
611
return cc ? -EIO : 0;
612
}
613
614
/**
615
* kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM.
616
* @kvm: the VM previously associated with the protected VM
617
* @rc: return value for the RC field of the UVCB
618
* @rrc: return value for the RRC field of the UVCB
619
*
620
* Tear down the protected VM that had been previously prepared for teardown
621
* using kvm_s390_pv_set_aside_vm. Ideally this should be called by
622
* userspace asynchronously from a separate thread.
623
*
624
* Context: kvm->lock must not be held.
625
*
626
* Return: 0 in case of success, -EINVAL if no protected VM had been
627
* prepared for asynchronous teardowm, -EIO in case of other errors.
628
*/
629
int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
630
{
631
struct pv_vm_to_be_destroyed *p;
632
int ret = 0;
633
634
lockdep_assert_not_held(&kvm->lock);
635
mutex_lock(&kvm->lock);
636
p = kvm->arch.pv.set_aside;
637
kvm->arch.pv.set_aside = NULL;
638
mutex_unlock(&kvm->lock);
639
if (!p)
640
return -EINVAL;
641
642
/* When a fatal signal is received, stop immediately */
643
if (gmap_pv_destroy_range(kvm->arch.gmap, 0, asce_end(kvm->arch.gmap->asce), true))
644
goto done;
645
if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
646
ret = -EIO;
647
kfree(p);
648
p = NULL;
649
done:
650
/*
651
* p is not NULL if we aborted because of a fatal signal, in which
652
* case queue the leftover for later cleanup.
653
*/
654
if (p) {
655
mutex_lock(&kvm->lock);
656
list_add(&p->list, &kvm->arch.pv.need_cleanup);
657
mutex_unlock(&kvm->lock);
658
/* Did not finish, but pretend things went well */
659
*rc = UVC_RC_EXECUTED;
660
*rrc = 42;
661
}
662
return ret;
663
}
664
665
static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
666
struct mm_struct *mm)
667
{
668
struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
669
u16 dummy;
670
int r;
671
672
/*
673
* No locking is needed since this is the last thread of the last user of this
674
* struct mm.
675
* When the struct kvm gets deinitialized, this notifier is also
676
* unregistered. This means that if this notifier runs, then the
677
* struct kvm is still valid.
678
*/
679
r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
680
if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm))
681
kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy);
682
set_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &kvm->arch.gmap->flags);
683
}
684
685
static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
686
.release = kvm_s390_pv_mmu_notifier_release,
687
};
688
689
int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
690
{
691
struct uv_cb_cgc uvcb = {
692
.header.cmd = UVC_CMD_CREATE_SEC_CONF,
693
.header.len = sizeof(uvcb)
694
};
695
int cc, ret;
696
u16 dummy;
697
698
/* Add the notifier only once. No races because we hold kvm->lock */
699
if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
700
/* The notifier will be unregistered when the VM is destroyed */
701
kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
702
ret = mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
703
if (ret) {
704
kvm->arch.pv.mmu_notifier.ops = NULL;
705
return ret;
706
}
707
}
708
709
ret = kvm_s390_pv_alloc_vm(kvm);
710
if (ret)
711
return ret;
712
713
/* Inputs */
714
uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
715
uvcb.guest_stor_len = kvm->arch.pv.guest_len;
716
uvcb.guest_asce = kvm->arch.gmap->asce.val;
717
uvcb.guest_sca = virt_to_phys(kvm->arch.sca);
718
uvcb.conf_base_stor_origin =
719
virt_to_phys((void *)kvm->arch.pv.stor_base);
720
uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
721
uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
722
uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
723
724
clear_bit(GMAP_FLAG_ALLOW_HPAGE_1M, &kvm->arch.gmap->flags);
725
gmap_split_huge_pages(kvm->arch.gmap);
726
727
cc = uv_call_sched(0, (u64)&uvcb);
728
*rc = uvcb.header.rc;
729
*rrc = uvcb.header.rrc;
730
KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
731
uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
732
733
/* Outputs */
734
kvm->arch.pv.handle = uvcb.guest_handle;
735
736
atomic_inc(&kvm->mm->context.protected_count);
737
if (cc) {
738
if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
739
kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
740
} else {
741
atomic_dec(&kvm->mm->context.protected_count);
742
kvm_s390_pv_dealloc_vm(kvm);
743
}
744
return -EIO;
745
}
746
return 0;
747
}
748
749
int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
750
u16 *rrc)
751
{
752
struct uv_cb_ssc uvcb = {
753
.header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
754
.header.len = sizeof(uvcb),
755
.sec_header_origin = (u64)hdr,
756
.sec_header_len = length,
757
.guest_handle = kvm_s390_pv_get_handle(kvm),
758
};
759
int cc = uv_call(0, (u64)&uvcb);
760
761
*rc = uvcb.header.rc;
762
*rrc = uvcb.header.rrc;
763
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
764
*rc, *rrc);
765
return cc ? -EINVAL : 0;
766
}
767
768
static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
769
u64 offset, u16 *rc, u16 *rrc)
770
{
771
struct uv_cb_unp uvcb = {
772
.header.cmd = UVC_CMD_UNPACK_IMG,
773
.header.len = sizeof(uvcb),
774
.guest_handle = kvm_s390_pv_get_handle(kvm),
775
.gaddr = addr,
776
.tweak[0] = tweak,
777
.tweak[1] = offset,
778
};
779
int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
780
781
*rc = uvcb.header.rc;
782
*rrc = uvcb.header.rrc;
783
784
if (ret == -ENXIO) {
785
ret = kvm_s390_faultin_gfn_simple(NULL, kvm, gpa_to_gfn(addr), true);
786
if (!ret)
787
return -EAGAIN;
788
}
789
790
if (ret && ret != -EAGAIN)
791
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
792
uvcb.gaddr, *rc, *rrc);
793
return ret;
794
}
795
796
int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
797
unsigned long tweak, u16 *rc, u16 *rrc)
798
{
799
u64 offset = 0;
800
int ret = 0;
801
802
if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
803
return -EINVAL;
804
805
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
806
addr, size);
807
808
guard(srcu)(&kvm->srcu);
809
810
while (offset < size) {
811
ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
812
if (ret == -EAGAIN) {
813
cond_resched();
814
if (fatal_signal_pending(current))
815
break;
816
continue;
817
}
818
if (ret)
819
break;
820
addr += PAGE_SIZE;
821
offset += PAGE_SIZE;
822
}
823
if (!ret)
824
KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
825
return ret;
826
}
827
828
int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
829
{
830
struct uv_cb_cpu_set_state uvcb = {
831
.header.cmd = UVC_CMD_CPU_SET_STATE,
832
.header.len = sizeof(uvcb),
833
.cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu),
834
.state = state,
835
};
836
int cc;
837
838
cc = uv_call(0, (u64)&uvcb);
839
KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
840
vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
841
if (cc)
842
return -EINVAL;
843
return 0;
844
}
845
846
int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
847
{
848
struct uv_cb_dump_cpu uvcb = {
849
.header.cmd = UVC_CMD_DUMP_CPU,
850
.header.len = sizeof(uvcb),
851
.cpu_handle = vcpu->arch.pv.handle,
852
.dump_area_origin = (u64)buff,
853
};
854
int cc;
855
856
cc = uv_call_sched(0, (u64)&uvcb);
857
*rc = uvcb.header.rc;
858
*rrc = uvcb.header.rrc;
859
return cc;
860
}
861
862
/* Size of the cache for the storage state dump data. 1MB for now */
863
#define DUMP_BUFF_LEN HPAGE_SIZE
864
865
/**
866
* kvm_s390_pv_dump_stor_state
867
*
868
* @kvm: pointer to the guest's KVM struct
869
* @buff_user: Userspace pointer where we will write the results to
870
* @gaddr: Starting absolute guest address for which the storage state
871
* is requested.
872
* @buff_user_len: Length of the buff_user buffer
873
* @rc: Pointer to where the uvcb return code is stored
874
* @rrc: Pointer to where the uvcb return reason code is stored
875
*
876
* Stores buff_len bytes of tweak component values to buff_user
877
* starting with the 1MB block specified by the absolute guest address
878
* (gaddr). The gaddr pointer will be updated with the last address
879
* for which data was written when returning to userspace. buff_user
880
* might be written to even if an error rc is returned. For instance
881
* if we encounter a fault after writing the first page of data.
882
*
883
* Context: kvm->lock needs to be held
884
*
885
* Return:
886
* 0 on success
887
* -ENOMEM if allocating the cache fails
888
* -EINVAL if gaddr is not aligned to 1MB
889
* -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
890
* -EINVAL if the UV call fails, rc and rrc will be set in this case
891
* -EFAULT if copying the result to buff_user failed
892
*/
893
int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
894
u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
895
{
896
struct uv_cb_dump_stor_state uvcb = {
897
.header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
898
.header.len = sizeof(uvcb),
899
.config_handle = kvm->arch.pv.handle,
900
.gaddr = *gaddr,
901
.dump_area_origin = 0,
902
};
903
const u64 increment_len = uv_info.conf_dump_storage_state_len;
904
size_t buff_kvm_size;
905
size_t size_done = 0;
906
u8 *buff_kvm = NULL;
907
int cc, ret;
908
909
ret = -EINVAL;
910
/* UV call processes 1MB guest storage chunks at a time */
911
if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
912
goto out;
913
914
/*
915
* We provide the storage state for 1MB chunks of guest
916
* storage. The buffer will need to be aligned to
917
* conf_dump_storage_state_len so we don't end on a partial
918
* chunk.
919
*/
920
if (!buff_user_len ||
921
!IS_ALIGNED(buff_user_len, increment_len))
922
goto out;
923
924
/*
925
* Allocate a buffer from which we will later copy to the user
926
* process. We don't want userspace to dictate our buffer size
927
* so we limit it to DUMP_BUFF_LEN.
928
*/
929
ret = -ENOMEM;
930
buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
931
buff_kvm = vzalloc(buff_kvm_size);
932
if (!buff_kvm)
933
goto out;
934
935
ret = 0;
936
uvcb.dump_area_origin = (u64)buff_kvm;
937
/* We will loop until the user buffer is filled or an error occurs */
938
do {
939
/* Get 1MB worth of guest storage state data */
940
cc = uv_call_sched(0, (u64)&uvcb);
941
942
/* All or nothing */
943
if (cc) {
944
ret = -EINVAL;
945
break;
946
}
947
948
size_done += increment_len;
949
uvcb.dump_area_origin += increment_len;
950
buff_user_len -= increment_len;
951
uvcb.gaddr += HPAGE_SIZE;
952
953
/* KVM Buffer full, time to copy to the process */
954
if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
955
if (copy_to_user(buff_user, buff_kvm, size_done)) {
956
ret = -EFAULT;
957
break;
958
}
959
960
buff_user += size_done;
961
size_done = 0;
962
uvcb.dump_area_origin = (u64)buff_kvm;
963
}
964
} while (buff_user_len);
965
966
/* Report back where we ended dumping */
967
*gaddr = uvcb.gaddr;
968
969
/* Lets only log errors, we don't want to spam */
970
out:
971
if (ret)
972
KVM_UV_EVENT(kvm, 3,
973
"PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
974
uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
975
*rc = uvcb.header.rc;
976
*rrc = uvcb.header.rrc;
977
vfree(buff_kvm);
978
979
return ret;
980
}
981
982
/**
983
* kvm_s390_pv_dump_complete
984
*
985
* @kvm: pointer to the guest's KVM struct
986
* @buff_user: Userspace pointer where we will write the results to
987
* @rc: Pointer to where the uvcb return code is stored
988
* @rrc: Pointer to where the uvcb return reason code is stored
989
*
990
* Completes the dumping operation and writes the completion data to
991
* user space.
992
*
993
* Context: kvm->lock needs to be held
994
*
995
* Return:
996
* 0 on success
997
* -ENOMEM if allocating the completion buffer fails
998
* -EINVAL if the UV call fails, rc and rrc will be set in this case
999
* -EFAULT if copying the result to buff_user failed
1000
*/
1001
int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
1002
u16 *rc, u16 *rrc)
1003
{
1004
struct uv_cb_dump_complete complete = {
1005
.header.len = sizeof(complete),
1006
.header.cmd = UVC_CMD_DUMP_COMPLETE,
1007
.config_handle = kvm_s390_pv_get_handle(kvm),
1008
};
1009
u64 *compl_data;
1010
int ret;
1011
1012
/* Allocate dump area */
1013
compl_data = vzalloc(uv_info.conf_dump_finalize_len);
1014
if (!compl_data)
1015
return -ENOMEM;
1016
complete.dump_area_origin = (u64)compl_data;
1017
1018
ret = uv_call_sched(0, (u64)&complete);
1019
*rc = complete.header.rc;
1020
*rrc = complete.header.rrc;
1021
KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
1022
complete.header.rc, complete.header.rrc);
1023
1024
if (!ret) {
1025
/*
1026
* kvm_s390_pv_dealloc_vm() will also (mem)set
1027
* this to false on a reboot or other destroy
1028
* operation for this vm.
1029
*/
1030
kvm->arch.pv.dumping = false;
1031
kvm_s390_vcpu_unblock_all(kvm);
1032
ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
1033
if (ret)
1034
ret = -EFAULT;
1035
}
1036
vfree(compl_data);
1037
/* If the UVC returned an error, translate it to -EINVAL */
1038
if (ret > 0)
1039
ret = -EINVAL;
1040
return ret;
1041
}
1042
1043