Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/s390/mm/pgtable.c
29266 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright IBM Corp. 2007, 2011
4
* Author(s): Martin Schwidefsky <[email protected]>
5
*/
6
7
#include <linux/cpufeature.h>
8
#include <linux/export.h>
9
#include <linux/sched.h>
10
#include <linux/kernel.h>
11
#include <linux/errno.h>
12
#include <linux/gfp.h>
13
#include <linux/mm.h>
14
#include <linux/swap.h>
15
#include <linux/smp.h>
16
#include <linux/spinlock.h>
17
#include <linux/rcupdate.h>
18
#include <linux/slab.h>
19
#include <linux/swapops.h>
20
#include <linux/sysctl.h>
21
#include <linux/ksm.h>
22
#include <linux/mman.h>
23
24
#include <asm/tlbflush.h>
25
#include <asm/mmu_context.h>
26
#include <asm/page-states.h>
27
#include <asm/machine.h>
28
29
pgprot_t pgprot_writecombine(pgprot_t prot)
30
{
31
/*
32
* mio_wb_bit_mask may be set on a different CPU, but it is only set
33
* once at init and only read afterwards.
34
*/
35
return __pgprot(pgprot_val(prot) | mio_wb_bit_mask);
36
}
37
EXPORT_SYMBOL_GPL(pgprot_writecombine);
38
39
static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
40
pte_t *ptep, int nodat)
41
{
42
unsigned long opt, asce;
43
44
if (machine_has_tlb_guest()) {
45
opt = 0;
46
asce = READ_ONCE(mm->context.gmap_asce);
47
if (asce == 0UL || nodat)
48
opt |= IPTE_NODAT;
49
if (asce != -1UL) {
50
asce = asce ? : mm->context.asce;
51
opt |= IPTE_GUEST_ASCE;
52
}
53
__ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL);
54
} else {
55
__ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL);
56
}
57
}
58
59
static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
60
pte_t *ptep, int nodat)
61
{
62
unsigned long opt, asce;
63
64
if (machine_has_tlb_guest()) {
65
opt = 0;
66
asce = READ_ONCE(mm->context.gmap_asce);
67
if (asce == 0UL || nodat)
68
opt |= IPTE_NODAT;
69
if (asce != -1UL) {
70
asce = asce ? : mm->context.asce;
71
opt |= IPTE_GUEST_ASCE;
72
}
73
__ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL);
74
} else {
75
__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
76
}
77
}
78
79
static inline pte_t ptep_flush_direct(struct mm_struct *mm,
80
unsigned long addr, pte_t *ptep,
81
int nodat)
82
{
83
pte_t old;
84
85
old = *ptep;
86
if (unlikely(pte_val(old) & _PAGE_INVALID))
87
return old;
88
atomic_inc(&mm->context.flush_count);
89
if (cpu_has_tlb_lc() &&
90
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
91
ptep_ipte_local(mm, addr, ptep, nodat);
92
else
93
ptep_ipte_global(mm, addr, ptep, nodat);
94
atomic_dec(&mm->context.flush_count);
95
return old;
96
}
97
98
static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
99
unsigned long addr, pte_t *ptep,
100
int nodat)
101
{
102
pte_t old;
103
104
old = *ptep;
105
if (unlikely(pte_val(old) & _PAGE_INVALID))
106
return old;
107
atomic_inc(&mm->context.flush_count);
108
if (cpumask_equal(&mm->context.cpu_attach_mask,
109
cpumask_of(smp_processor_id()))) {
110
set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_INVALID)));
111
mm->context.flush_mm = 1;
112
} else
113
ptep_ipte_global(mm, addr, ptep, nodat);
114
atomic_dec(&mm->context.flush_count);
115
return old;
116
}
117
118
static inline pgste_t pgste_get_lock(pte_t *ptep)
119
{
120
unsigned long value = 0;
121
#ifdef CONFIG_PGSTE
122
unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
123
124
do {
125
value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
126
} while (value & PGSTE_PCL_BIT);
127
value |= PGSTE_PCL_BIT;
128
#endif
129
return __pgste(value);
130
}
131
132
static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
133
{
134
#ifdef CONFIG_PGSTE
135
barrier();
136
WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
137
#endif
138
}
139
140
static inline pgste_t pgste_get(pte_t *ptep)
141
{
142
unsigned long pgste = 0;
143
#ifdef CONFIG_PGSTE
144
pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
145
#endif
146
return __pgste(pgste);
147
}
148
149
static inline void pgste_set(pte_t *ptep, pgste_t pgste)
150
{
151
#ifdef CONFIG_PGSTE
152
*(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
153
#endif
154
}
155
156
static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
157
struct mm_struct *mm)
158
{
159
#ifdef CONFIG_PGSTE
160
unsigned long address, bits, skey;
161
162
if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID)
163
return pgste;
164
address = pte_val(pte) & PAGE_MASK;
165
skey = (unsigned long) page_get_storage_key(address);
166
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
167
/* Transfer page changed & referenced bit to guest bits in pgste */
168
pgste = set_pgste_bit(pgste, bits << 48); /* GR bit & GC bit */
169
/* Copy page access key and fetch protection bit to pgste */
170
pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
171
pgste = set_pgste_bit(pgste, (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
172
#endif
173
return pgste;
174
175
}
176
177
static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
178
struct mm_struct *mm)
179
{
180
#ifdef CONFIG_PGSTE
181
unsigned long address;
182
unsigned long nkey;
183
184
if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID)
185
return;
186
VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
187
address = pte_val(entry) & PAGE_MASK;
188
/*
189
* Set page access key and fetch protection bit from pgste.
190
* The guest C/R information is still in the PGSTE, set real
191
* key C/R to 0.
192
*/
193
nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
194
nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
195
page_set_storage_key(address, nkey, 0);
196
#endif
197
}
198
199
static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
200
{
201
#ifdef CONFIG_PGSTE
202
if ((pte_val(entry) & _PAGE_PRESENT) &&
203
(pte_val(entry) & _PAGE_WRITE) &&
204
!(pte_val(entry) & _PAGE_INVALID)) {
205
if (!machine_has_esop()) {
206
/*
207
* Without enhanced suppression-on-protection force
208
* the dirty bit on for all writable ptes.
209
*/
210
entry = set_pte_bit(entry, __pgprot(_PAGE_DIRTY));
211
entry = clear_pte_bit(entry, __pgprot(_PAGE_PROTECT));
212
}
213
if (!(pte_val(entry) & _PAGE_PROTECT))
214
/* This pte allows write access, set user-dirty */
215
pgste = set_pgste_bit(pgste, PGSTE_UC_BIT);
216
}
217
#endif
218
set_pte(ptep, entry);
219
return pgste;
220
}
221
222
static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
223
unsigned long addr,
224
pte_t *ptep, pgste_t pgste)
225
{
226
#ifdef CONFIG_PGSTE
227
unsigned long bits;
228
229
bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
230
if (bits) {
231
pgste = __pgste(pgste_val(pgste) ^ bits);
232
ptep_notify(mm, addr, ptep, bits);
233
}
234
#endif
235
return pgste;
236
}
237
238
static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
239
unsigned long addr, pte_t *ptep)
240
{
241
pgste_t pgste = __pgste(0);
242
243
if (mm_has_pgste(mm)) {
244
pgste = pgste_get_lock(ptep);
245
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
246
}
247
return pgste;
248
}
249
250
static inline pte_t ptep_xchg_commit(struct mm_struct *mm,
251
unsigned long addr, pte_t *ptep,
252
pgste_t pgste, pte_t old, pte_t new)
253
{
254
if (mm_has_pgste(mm)) {
255
if (pte_val(old) & _PAGE_INVALID)
256
pgste_set_key(ptep, pgste, new, mm);
257
if (pte_val(new) & _PAGE_INVALID) {
258
pgste = pgste_update_all(old, pgste, mm);
259
if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
260
_PGSTE_GPS_USAGE_UNUSED)
261
old = set_pte_bit(old, __pgprot(_PAGE_UNUSED));
262
}
263
pgste = pgste_set_pte(ptep, pgste, new);
264
pgste_set_unlock(ptep, pgste);
265
} else {
266
set_pte(ptep, new);
267
}
268
return old;
269
}
270
271
pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
272
pte_t *ptep, pte_t new)
273
{
274
pgste_t pgste;
275
pte_t old;
276
int nodat;
277
278
preempt_disable();
279
pgste = ptep_xchg_start(mm, addr, ptep);
280
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
281
old = ptep_flush_direct(mm, addr, ptep, nodat);
282
old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
283
preempt_enable();
284
return old;
285
}
286
EXPORT_SYMBOL(ptep_xchg_direct);
287
288
/*
289
* Caller must check that new PTE only differs in _PAGE_PROTECT HW bit, so that
290
* RDP can be used instead of IPTE. See also comments at pte_allow_rdp().
291
*/
292
void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
293
pte_t new)
294
{
295
preempt_disable();
296
atomic_inc(&mm->context.flush_count);
297
if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
298
__ptep_rdp(addr, ptep, 0, 0, 1);
299
else
300
__ptep_rdp(addr, ptep, 0, 0, 0);
301
/*
302
* PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
303
* means it is still valid and active, and must not be changed according
304
* to the architecture. But writing a new value that only differs in SW
305
* bits is allowed.
306
*/
307
set_pte(ptep, new);
308
atomic_dec(&mm->context.flush_count);
309
preempt_enable();
310
}
311
EXPORT_SYMBOL(ptep_reset_dat_prot);
312
313
pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
314
pte_t *ptep, pte_t new)
315
{
316
pgste_t pgste;
317
pte_t old;
318
int nodat;
319
320
preempt_disable();
321
pgste = ptep_xchg_start(mm, addr, ptep);
322
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
323
old = ptep_flush_lazy(mm, addr, ptep, nodat);
324
old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
325
preempt_enable();
326
return old;
327
}
328
EXPORT_SYMBOL(ptep_xchg_lazy);
329
330
pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
331
pte_t *ptep)
332
{
333
pgste_t pgste;
334
pte_t old;
335
int nodat;
336
struct mm_struct *mm = vma->vm_mm;
337
338
pgste = ptep_xchg_start(mm, addr, ptep);
339
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
340
old = ptep_flush_lazy(mm, addr, ptep, nodat);
341
if (mm_has_pgste(mm)) {
342
pgste = pgste_update_all(old, pgste, mm);
343
pgste_set(ptep, pgste);
344
}
345
return old;
346
}
347
348
void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
349
pte_t *ptep, pte_t old_pte, pte_t pte)
350
{
351
pgste_t pgste;
352
struct mm_struct *mm = vma->vm_mm;
353
354
if (mm_has_pgste(mm)) {
355
pgste = pgste_get(ptep);
356
pgste_set_key(ptep, pgste, pte, mm);
357
pgste = pgste_set_pte(ptep, pgste, pte);
358
pgste_set_unlock(ptep, pgste);
359
} else {
360
set_pte(ptep, pte);
361
}
362
}
363
364
static inline void pmdp_idte_local(struct mm_struct *mm,
365
unsigned long addr, pmd_t *pmdp)
366
{
367
if (machine_has_tlb_guest())
368
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
369
mm->context.asce, IDTE_LOCAL);
370
else
371
__pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
372
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
373
gmap_pmdp_idte_local(mm, addr);
374
}
375
376
static inline void pmdp_idte_global(struct mm_struct *mm,
377
unsigned long addr, pmd_t *pmdp)
378
{
379
if (machine_has_tlb_guest()) {
380
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
381
mm->context.asce, IDTE_GLOBAL);
382
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
383
gmap_pmdp_idte_global(mm, addr);
384
} else if (cpu_has_idte()) {
385
__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
386
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
387
gmap_pmdp_idte_global(mm, addr);
388
} else {
389
__pmdp_csp(pmdp);
390
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
391
gmap_pmdp_csp(mm, addr);
392
}
393
}
394
395
static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
396
unsigned long addr, pmd_t *pmdp)
397
{
398
pmd_t old;
399
400
old = *pmdp;
401
if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
402
return old;
403
atomic_inc(&mm->context.flush_count);
404
if (cpu_has_tlb_lc() &&
405
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
406
pmdp_idte_local(mm, addr, pmdp);
407
else
408
pmdp_idte_global(mm, addr, pmdp);
409
atomic_dec(&mm->context.flush_count);
410
return old;
411
}
412
413
static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
414
unsigned long addr, pmd_t *pmdp)
415
{
416
pmd_t old;
417
418
old = *pmdp;
419
if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
420
return old;
421
atomic_inc(&mm->context.flush_count);
422
if (cpumask_equal(&mm->context.cpu_attach_mask,
423
cpumask_of(smp_processor_id()))) {
424
set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_INVALID)));
425
mm->context.flush_mm = 1;
426
if (mm_has_pgste(mm))
427
gmap_pmdp_invalidate(mm, addr);
428
} else {
429
pmdp_idte_global(mm, addr, pmdp);
430
}
431
atomic_dec(&mm->context.flush_count);
432
return old;
433
}
434
435
#ifdef CONFIG_PGSTE
436
static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
437
{
438
struct vm_area_struct *vma;
439
pgd_t *pgd;
440
p4d_t *p4d;
441
pud_t *pud;
442
443
/* We need a valid VMA, otherwise this is clearly a fault. */
444
vma = vma_lookup(mm, addr);
445
if (!vma)
446
return -EFAULT;
447
448
pgd = pgd_offset(mm, addr);
449
if (!pgd_present(*pgd))
450
return -ENOENT;
451
452
p4d = p4d_offset(pgd, addr);
453
if (!p4d_present(*p4d))
454
return -ENOENT;
455
456
pud = pud_offset(p4d, addr);
457
if (!pud_present(*pud))
458
return -ENOENT;
459
460
/* Large PUDs are not supported yet. */
461
if (pud_leaf(*pud))
462
return -EFAULT;
463
464
*pmdp = pmd_offset(pud, addr);
465
return 0;
466
}
467
#endif
468
469
pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
470
pmd_t *pmdp, pmd_t new)
471
{
472
pmd_t old;
473
474
preempt_disable();
475
old = pmdp_flush_direct(mm, addr, pmdp);
476
set_pmd(pmdp, new);
477
preempt_enable();
478
return old;
479
}
480
EXPORT_SYMBOL(pmdp_xchg_direct);
481
482
pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
483
pmd_t *pmdp, pmd_t new)
484
{
485
pmd_t old;
486
487
preempt_disable();
488
old = pmdp_flush_lazy(mm, addr, pmdp);
489
set_pmd(pmdp, new);
490
preempt_enable();
491
return old;
492
}
493
EXPORT_SYMBOL(pmdp_xchg_lazy);
494
495
static inline void pudp_idte_local(struct mm_struct *mm,
496
unsigned long addr, pud_t *pudp)
497
{
498
if (machine_has_tlb_guest())
499
__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
500
mm->context.asce, IDTE_LOCAL);
501
else
502
__pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL);
503
}
504
505
static inline void pudp_idte_global(struct mm_struct *mm,
506
unsigned long addr, pud_t *pudp)
507
{
508
if (machine_has_tlb_guest())
509
__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
510
mm->context.asce, IDTE_GLOBAL);
511
else if (cpu_has_idte())
512
__pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
513
else
514
/*
515
* Invalid bit position is the same for pmd and pud, so we can
516
* reuse _pmd_csp() here
517
*/
518
__pmdp_csp((pmd_t *) pudp);
519
}
520
521
static inline pud_t pudp_flush_direct(struct mm_struct *mm,
522
unsigned long addr, pud_t *pudp)
523
{
524
pud_t old;
525
526
old = *pudp;
527
if (pud_val(old) & _REGION_ENTRY_INVALID)
528
return old;
529
atomic_inc(&mm->context.flush_count);
530
if (cpu_has_tlb_lc() &&
531
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
532
pudp_idte_local(mm, addr, pudp);
533
else
534
pudp_idte_global(mm, addr, pudp);
535
atomic_dec(&mm->context.flush_count);
536
return old;
537
}
538
539
pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
540
pud_t *pudp, pud_t new)
541
{
542
pud_t old;
543
544
preempt_disable();
545
old = pudp_flush_direct(mm, addr, pudp);
546
set_pud(pudp, new);
547
preempt_enable();
548
return old;
549
}
550
EXPORT_SYMBOL(pudp_xchg_direct);
551
552
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
553
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
554
pgtable_t pgtable)
555
{
556
struct list_head *lh = (struct list_head *) pgtable;
557
558
assert_spin_locked(pmd_lockptr(mm, pmdp));
559
560
/* FIFO */
561
if (!pmd_huge_pte(mm, pmdp))
562
INIT_LIST_HEAD(lh);
563
else
564
list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
565
pmd_huge_pte(mm, pmdp) = pgtable;
566
}
567
568
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
569
{
570
struct list_head *lh;
571
pgtable_t pgtable;
572
pte_t *ptep;
573
574
assert_spin_locked(pmd_lockptr(mm, pmdp));
575
576
/* FIFO */
577
pgtable = pmd_huge_pte(mm, pmdp);
578
lh = (struct list_head *) pgtable;
579
if (list_empty(lh))
580
pmd_huge_pte(mm, pmdp) = NULL;
581
else {
582
pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
583
list_del(lh);
584
}
585
ptep = (pte_t *) pgtable;
586
set_pte(ptep, __pte(_PAGE_INVALID));
587
ptep++;
588
set_pte(ptep, __pte(_PAGE_INVALID));
589
return pgtable;
590
}
591
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
592
593
#ifdef CONFIG_PGSTE
594
void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
595
pte_t *ptep, pte_t entry)
596
{
597
pgste_t pgste;
598
599
/* the mm_has_pgste() check is done in set_pte_at() */
600
preempt_disable();
601
pgste = pgste_get_lock(ptep);
602
pgste = clear_pgste_bit(pgste, _PGSTE_GPS_ZERO);
603
pgste_set_key(ptep, pgste, entry, mm);
604
pgste = pgste_set_pte(ptep, pgste, entry);
605
pgste_set_unlock(ptep, pgste);
606
preempt_enable();
607
}
608
609
void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
610
{
611
pgste_t pgste;
612
613
preempt_disable();
614
pgste = pgste_get_lock(ptep);
615
pgste = set_pgste_bit(pgste, PGSTE_IN_BIT);
616
pgste_set_unlock(ptep, pgste);
617
preempt_enable();
618
}
619
620
/**
621
* ptep_force_prot - change access rights of a locked pte
622
* @mm: pointer to the process mm_struct
623
* @addr: virtual address in the guest address space
624
* @ptep: pointer to the page table entry
625
* @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
626
* @bit: pgste bit to set (e.g. for notification)
627
*
628
* Returns 0 if the access rights were changed and -EAGAIN if the current
629
* and requested access rights are incompatible.
630
*/
631
int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
632
pte_t *ptep, int prot, unsigned long bit)
633
{
634
pte_t entry;
635
pgste_t pgste;
636
int pte_i, pte_p, nodat;
637
638
pgste = pgste_get_lock(ptep);
639
entry = *ptep;
640
/* Check pte entry after all locks have been acquired */
641
pte_i = pte_val(entry) & _PAGE_INVALID;
642
pte_p = pte_val(entry) & _PAGE_PROTECT;
643
if ((pte_i && (prot != PROT_NONE)) ||
644
(pte_p && (prot & PROT_WRITE))) {
645
pgste_set_unlock(ptep, pgste);
646
return -EAGAIN;
647
}
648
/* Change access rights and set pgste bit */
649
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
650
if (prot == PROT_NONE && !pte_i) {
651
ptep_flush_direct(mm, addr, ptep, nodat);
652
pgste = pgste_update_all(entry, pgste, mm);
653
entry = set_pte_bit(entry, __pgprot(_PAGE_INVALID));
654
}
655
if (prot == PROT_READ && !pte_p) {
656
ptep_flush_direct(mm, addr, ptep, nodat);
657
entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID));
658
entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT));
659
}
660
pgste = set_pgste_bit(pgste, bit);
661
pgste = pgste_set_pte(ptep, pgste, entry);
662
pgste_set_unlock(ptep, pgste);
663
return 0;
664
}
665
666
int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
667
pte_t *sptep, pte_t *tptep, pte_t pte)
668
{
669
pgste_t spgste, tpgste;
670
pte_t spte, tpte;
671
int rc = -EAGAIN;
672
673
if (!(pte_val(*tptep) & _PAGE_INVALID))
674
return 0; /* already shadowed */
675
spgste = pgste_get_lock(sptep);
676
spte = *sptep;
677
if (!(pte_val(spte) & _PAGE_INVALID) &&
678
!((pte_val(spte) & _PAGE_PROTECT) &&
679
!(pte_val(pte) & _PAGE_PROTECT))) {
680
spgste = set_pgste_bit(spgste, PGSTE_VSIE_BIT);
681
tpgste = pgste_get_lock(tptep);
682
tpte = __pte((pte_val(spte) & PAGE_MASK) |
683
(pte_val(pte) & _PAGE_PROTECT));
684
/* don't touch the storage key - it belongs to parent pgste */
685
tpgste = pgste_set_pte(tptep, tpgste, tpte);
686
pgste_set_unlock(tptep, tpgste);
687
rc = 1;
688
}
689
pgste_set_unlock(sptep, spgste);
690
return rc;
691
}
692
693
void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
694
{
695
pgste_t pgste;
696
int nodat;
697
698
pgste = pgste_get_lock(ptep);
699
/* notifier is called by the caller */
700
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
701
ptep_flush_direct(mm, saddr, ptep, nodat);
702
/* don't touch the storage key - it belongs to parent pgste */
703
pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
704
pgste_set_unlock(ptep, pgste);
705
}
706
707
static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
708
{
709
if (!non_swap_entry(entry))
710
dec_mm_counter(mm, MM_SWAPENTS);
711
else if (is_migration_entry(entry)) {
712
struct folio *folio = pfn_swap_entry_folio(entry);
713
714
dec_mm_counter(mm, mm_counter(folio));
715
}
716
free_swap_and_cache(entry);
717
}
718
719
void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
720
pte_t *ptep, int reset)
721
{
722
unsigned long pgstev;
723
pgste_t pgste;
724
pte_t pte;
725
726
/* Zap unused and logically-zero pages */
727
preempt_disable();
728
pgste = pgste_get_lock(ptep);
729
pgstev = pgste_val(pgste);
730
pte = *ptep;
731
if (!reset && pte_swap(pte) &&
732
((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
733
(pgstev & _PGSTE_GPS_ZERO))) {
734
ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
735
pte_clear(mm, addr, ptep);
736
}
737
if (reset)
738
pgste = clear_pgste_bit(pgste, _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
739
pgste_set_unlock(ptep, pgste);
740
preempt_enable();
741
}
742
743
void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
744
{
745
unsigned long ptev;
746
pgste_t pgste;
747
748
/* Clear storage key ACC and F, but set R/C */
749
preempt_disable();
750
pgste = pgste_get_lock(ptep);
751
pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
752
pgste = set_pgste_bit(pgste, PGSTE_GR_BIT | PGSTE_GC_BIT);
753
ptev = pte_val(*ptep);
754
if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
755
page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);
756
pgste_set_unlock(ptep, pgste);
757
preempt_enable();
758
}
759
760
/*
761
* Test and reset if a guest page is dirty
762
*/
763
bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
764
pte_t *ptep)
765
{
766
pgste_t pgste;
767
pte_t pte;
768
bool dirty;
769
int nodat;
770
771
pgste = pgste_get_lock(ptep);
772
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
773
pgste = clear_pgste_bit(pgste, PGSTE_UC_BIT);
774
pte = *ptep;
775
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
776
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
777
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
778
ptep_ipte_global(mm, addr, ptep, nodat);
779
if (machine_has_esop() || !(pte_val(pte) & _PAGE_WRITE))
780
pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
781
else
782
pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID));
783
set_pte(ptep, pte);
784
}
785
pgste_set_unlock(ptep, pgste);
786
return dirty;
787
}
788
EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);
789
790
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
791
unsigned char key, bool nq)
792
{
793
unsigned long keyul, paddr;
794
spinlock_t *ptl;
795
pgste_t old, new;
796
pmd_t *pmdp;
797
pte_t *ptep;
798
799
/*
800
* If we don't have a PTE table and if there is no huge page mapped,
801
* we can ignore attempts to set the key to 0, because it already is 0.
802
*/
803
switch (pmd_lookup(mm, addr, &pmdp)) {
804
case -ENOENT:
805
return key ? -EFAULT : 0;
806
case 0:
807
break;
808
default:
809
return -EFAULT;
810
}
811
again:
812
ptl = pmd_lock(mm, pmdp);
813
if (!pmd_present(*pmdp)) {
814
spin_unlock(ptl);
815
return key ? -EFAULT : 0;
816
}
817
818
if (pmd_leaf(*pmdp)) {
819
paddr = pmd_val(*pmdp) & HPAGE_MASK;
820
paddr |= addr & ~HPAGE_MASK;
821
/*
822
* Huge pmds need quiescing operations, they are
823
* always mapped.
824
*/
825
page_set_storage_key(paddr, key, 1);
826
spin_unlock(ptl);
827
return 0;
828
}
829
spin_unlock(ptl);
830
831
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
832
if (!ptep)
833
goto again;
834
new = old = pgste_get_lock(ptep);
835
new = clear_pgste_bit(new, PGSTE_GR_BIT | PGSTE_GC_BIT |
836
PGSTE_ACC_BITS | PGSTE_FP_BIT);
837
keyul = (unsigned long) key;
838
new = set_pgste_bit(new, (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48);
839
new = set_pgste_bit(new, (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
840
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
841
unsigned long bits, skey;
842
843
paddr = pte_val(*ptep) & PAGE_MASK;
844
skey = (unsigned long) page_get_storage_key(paddr);
845
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
846
skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
847
/* Set storage key ACC and FP */
848
page_set_storage_key(paddr, skey, !nq);
849
/* Merge host changed & referenced into pgste */
850
new = set_pgste_bit(new, bits << 52);
851
}
852
/* changing the guest storage key is considered a change of the page */
853
if ((pgste_val(new) ^ pgste_val(old)) &
854
(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
855
new = set_pgste_bit(new, PGSTE_UC_BIT);
856
857
pgste_set_unlock(ptep, new);
858
pte_unmap_unlock(ptep, ptl);
859
return 0;
860
}
861
EXPORT_SYMBOL(set_guest_storage_key);
862
863
/*
864
* Conditionally set a guest storage key (handling csske).
865
* oldkey will be updated when either mr or mc is set and a pointer is given.
866
*
867
* Returns 0 if a guests storage key update wasn't necessary, 1 if the guest
868
* storage key was updated and -EFAULT on access errors.
869
*/
870
int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
871
unsigned char key, unsigned char *oldkey,
872
bool nq, bool mr, bool mc)
873
{
874
unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT;
875
int rc;
876
877
/* we can drop the pgste lock between getting and setting the key */
878
if (mr | mc) {
879
rc = get_guest_storage_key(current->mm, addr, &tmp);
880
if (rc)
881
return rc;
882
if (oldkey)
883
*oldkey = tmp;
884
if (!mr)
885
mask |= _PAGE_REFERENCED;
886
if (!mc)
887
mask |= _PAGE_CHANGED;
888
if (!((tmp ^ key) & mask))
889
return 0;
890
}
891
rc = set_guest_storage_key(current->mm, addr, key, nq);
892
return rc < 0 ? rc : 1;
893
}
894
EXPORT_SYMBOL(cond_set_guest_storage_key);
895
896
/*
897
* Reset a guest reference bit (rrbe), returning the reference and changed bit.
898
*
899
* Returns < 0 in case of error, otherwise the cc to be reported to the guest.
900
*/
901
int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
902
{
903
spinlock_t *ptl;
904
unsigned long paddr;
905
pgste_t old, new;
906
pmd_t *pmdp;
907
pte_t *ptep;
908
int cc = 0;
909
910
/*
911
* If we don't have a PTE table and if there is no huge page mapped,
912
* the storage key is 0 and there is nothing for us to do.
913
*/
914
switch (pmd_lookup(mm, addr, &pmdp)) {
915
case -ENOENT:
916
return 0;
917
case 0:
918
break;
919
default:
920
return -EFAULT;
921
}
922
again:
923
ptl = pmd_lock(mm, pmdp);
924
if (!pmd_present(*pmdp)) {
925
spin_unlock(ptl);
926
return 0;
927
}
928
929
if (pmd_leaf(*pmdp)) {
930
paddr = pmd_val(*pmdp) & HPAGE_MASK;
931
paddr |= addr & ~HPAGE_MASK;
932
cc = page_reset_referenced(paddr);
933
spin_unlock(ptl);
934
return cc;
935
}
936
spin_unlock(ptl);
937
938
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
939
if (!ptep)
940
goto again;
941
new = old = pgste_get_lock(ptep);
942
/* Reset guest reference bit only */
943
new = clear_pgste_bit(new, PGSTE_GR_BIT);
944
945
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
946
paddr = pte_val(*ptep) & PAGE_MASK;
947
cc = page_reset_referenced(paddr);
948
/* Merge real referenced bit into host-set */
949
new = set_pgste_bit(new, ((unsigned long)cc << 53) & PGSTE_HR_BIT);
950
}
951
/* Reflect guest's logical view, not physical */
952
cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
953
/* Changing the guest storage key is considered a change of the page */
954
if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
955
new = set_pgste_bit(new, PGSTE_UC_BIT);
956
957
pgste_set_unlock(ptep, new);
958
pte_unmap_unlock(ptep, ptl);
959
return cc;
960
}
961
EXPORT_SYMBOL(reset_guest_reference_bit);
962
963
int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
964
unsigned char *key)
965
{
966
unsigned long paddr;
967
spinlock_t *ptl;
968
pgste_t pgste;
969
pmd_t *pmdp;
970
pte_t *ptep;
971
972
/*
973
* If we don't have a PTE table and if there is no huge page mapped,
974
* the storage key is 0.
975
*/
976
*key = 0;
977
978
switch (pmd_lookup(mm, addr, &pmdp)) {
979
case -ENOENT:
980
return 0;
981
case 0:
982
break;
983
default:
984
return -EFAULT;
985
}
986
again:
987
ptl = pmd_lock(mm, pmdp);
988
if (!pmd_present(*pmdp)) {
989
spin_unlock(ptl);
990
return 0;
991
}
992
993
if (pmd_leaf(*pmdp)) {
994
paddr = pmd_val(*pmdp) & HPAGE_MASK;
995
paddr |= addr & ~HPAGE_MASK;
996
*key = page_get_storage_key(paddr);
997
spin_unlock(ptl);
998
return 0;
999
}
1000
spin_unlock(ptl);
1001
1002
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
1003
if (!ptep)
1004
goto again;
1005
pgste = pgste_get_lock(ptep);
1006
*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
1007
paddr = pte_val(*ptep) & PAGE_MASK;
1008
if (!(pte_val(*ptep) & _PAGE_INVALID))
1009
*key = page_get_storage_key(paddr);
1010
/* Reflect guest's logical view, not physical */
1011
*key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
1012
pgste_set_unlock(ptep, pgste);
1013
pte_unmap_unlock(ptep, ptl);
1014
return 0;
1015
}
1016
EXPORT_SYMBOL(get_guest_storage_key);
1017
1018
/**
1019
* pgste_perform_essa - perform ESSA actions on the PGSTE.
1020
* @mm: the memory context. It must have PGSTEs, no check is performed here!
1021
* @hva: the host virtual address of the page whose PGSTE is to be processed
1022
* @orc: the specific action to perform, see the ESSA_SET_* macros.
1023
* @oldpte: the PTE will be saved there if the pointer is not NULL.
1024
* @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
1025
*
1026
* Return: 1 if the page is to be added to the CBRL, otherwise 0,
1027
* or < 0 in case of error. -EINVAL is returned for invalid values
1028
* of orc, -EFAULT for invalid addresses.
1029
*/
1030
int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
1031
unsigned long *oldpte, unsigned long *oldpgste)
1032
{
1033
struct vm_area_struct *vma;
1034
unsigned long pgstev;
1035
spinlock_t *ptl;
1036
pgste_t pgste;
1037
pte_t *ptep;
1038
int res = 0;
1039
1040
WARN_ON_ONCE(orc > ESSA_MAX);
1041
if (unlikely(orc > ESSA_MAX))
1042
return -EINVAL;
1043
1044
vma = vma_lookup(mm, hva);
1045
if (!vma || is_vm_hugetlb_page(vma))
1046
return -EFAULT;
1047
ptep = get_locked_pte(mm, hva, &ptl);
1048
if (unlikely(!ptep))
1049
return -EFAULT;
1050
pgste = pgste_get_lock(ptep);
1051
pgstev = pgste_val(pgste);
1052
if (oldpte)
1053
*oldpte = pte_val(*ptep);
1054
if (oldpgste)
1055
*oldpgste = pgstev;
1056
1057
switch (orc) {
1058
case ESSA_GET_STATE:
1059
break;
1060
case ESSA_SET_STABLE:
1061
pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
1062
pgstev |= _PGSTE_GPS_USAGE_STABLE;
1063
break;
1064
case ESSA_SET_UNUSED:
1065
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1066
pgstev |= _PGSTE_GPS_USAGE_UNUSED;
1067
if (pte_val(*ptep) & _PAGE_INVALID)
1068
res = 1;
1069
break;
1070
case ESSA_SET_VOLATILE:
1071
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1072
pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
1073
if (pte_val(*ptep) & _PAGE_INVALID)
1074
res = 1;
1075
break;
1076
case ESSA_SET_POT_VOLATILE:
1077
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1078
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
1079
pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
1080
break;
1081
}
1082
if (pgstev & _PGSTE_GPS_ZERO) {
1083
pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
1084
break;
1085
}
1086
if (!(pgstev & PGSTE_GC_BIT)) {
1087
pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
1088
res = 1;
1089
break;
1090
}
1091
break;
1092
case ESSA_SET_STABLE_RESIDENT:
1093
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1094
pgstev |= _PGSTE_GPS_USAGE_STABLE;
1095
/*
1096
* Since the resident state can go away any time after this
1097
* call, we will not make this page resident. We can revisit
1098
* this decision if a guest will ever start using this.
1099
*/
1100
break;
1101
case ESSA_SET_STABLE_IF_RESIDENT:
1102
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
1103
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1104
pgstev |= _PGSTE_GPS_USAGE_STABLE;
1105
}
1106
break;
1107
case ESSA_SET_STABLE_NODAT:
1108
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1109
pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT;
1110
break;
1111
default:
1112
/* we should never get here! */
1113
break;
1114
}
1115
/* If we are discarding a page, set it to logical zero */
1116
if (res)
1117
pgstev |= _PGSTE_GPS_ZERO;
1118
1119
pgste = __pgste(pgstev);
1120
pgste_set_unlock(ptep, pgste);
1121
pte_unmap_unlock(ptep, ptl);
1122
return res;
1123
}
1124
EXPORT_SYMBOL(pgste_perform_essa);
1125
1126
/**
1127
* set_pgste_bits - set specific PGSTE bits.
1128
* @mm: the memory context. It must have PGSTEs, no check is performed here!
1129
* @hva: the host virtual address of the page whose PGSTE is to be processed
1130
* @bits: a bitmask representing the bits that will be touched
1131
* @value: the values of the bits to be written. Only the bits in the mask
1132
* will be written.
1133
*
1134
* Return: 0 on success, < 0 in case of error.
1135
*/
1136
int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
1137
unsigned long bits, unsigned long value)
1138
{
1139
struct vm_area_struct *vma;
1140
spinlock_t *ptl;
1141
pgste_t new;
1142
pte_t *ptep;
1143
1144
vma = vma_lookup(mm, hva);
1145
if (!vma || is_vm_hugetlb_page(vma))
1146
return -EFAULT;
1147
ptep = get_locked_pte(mm, hva, &ptl);
1148
if (unlikely(!ptep))
1149
return -EFAULT;
1150
new = pgste_get_lock(ptep);
1151
1152
new = clear_pgste_bit(new, bits);
1153
new = set_pgste_bit(new, value & bits);
1154
1155
pgste_set_unlock(ptep, new);
1156
pte_unmap_unlock(ptep, ptl);
1157
return 0;
1158
}
1159
EXPORT_SYMBOL(set_pgste_bits);
1160
1161
/**
1162
* get_pgste - get the current PGSTE for the given address.
1163
* @mm: the memory context. It must have PGSTEs, no check is performed here!
1164
* @hva: the host virtual address of the page whose PGSTE is to be processed
1165
* @pgstep: will be written with the current PGSTE for the given address.
1166
*
1167
* Return: 0 on success, < 0 in case of error.
1168
*/
1169
int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
1170
{
1171
struct vm_area_struct *vma;
1172
spinlock_t *ptl;
1173
pte_t *ptep;
1174
1175
vma = vma_lookup(mm, hva);
1176
if (!vma || is_vm_hugetlb_page(vma))
1177
return -EFAULT;
1178
ptep = get_locked_pte(mm, hva, &ptl);
1179
if (unlikely(!ptep))
1180
return -EFAULT;
1181
*pgstep = pgste_val(pgste_get(ptep));
1182
pte_unmap_unlock(ptep, ptl);
1183
return 0;
1184
}
1185
EXPORT_SYMBOL(get_pgste);
1186
#endif
1187
1188