Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/rust/kernel/mm/virt.rs
29266 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
// Copyright (C) 2024 Google LLC.
4
5
//! Virtual memory.
6
//!
7
//! This module deals with managing a single VMA in the address space of a userspace process. Each
8
//! VMA corresponds to a region of memory that the userspace process can access, and the VMA lets
9
//! you control what happens when userspace reads or writes to that region of memory.
10
//!
11
//! The module has several different Rust types that all correspond to the C type called
12
//! `vm_area_struct`. The different structs represent what kind of access you have to the VMA, e.g.
13
//! [`VmaRef`] is used when you hold the mmap or vma read lock. Using the appropriate struct
14
//! ensures that you can't, for example, accidentally call a function that requires holding the
15
//! write lock when you only hold the read lock.
16
17
use crate::{
18
bindings,
19
error::{code::EINVAL, to_result, Result},
20
mm::MmWithUser,
21
page::Page,
22
types::Opaque,
23
};
24
25
use core::ops::Deref;
26
27
/// A wrapper for the kernel's `struct vm_area_struct` with read access.
28
///
29
/// It represents an area of virtual memory.
30
///
31
/// # Invariants
32
///
33
/// The caller must hold the mmap read lock or the vma read lock.
34
#[repr(transparent)]
35
pub struct VmaRef {
36
vma: Opaque<bindings::vm_area_struct>,
37
}
38
39
// Methods you can call when holding the mmap or vma read lock (or stronger). They must be usable
40
// no matter what the vma flags are.
41
impl VmaRef {
42
/// Access a virtual memory area given a raw pointer.
43
///
44
/// # Safety
45
///
46
/// Callers must ensure that `vma` is valid for the duration of 'a, and that the mmap or vma
47
/// read lock (or stronger) is held for at least the duration of 'a.
48
#[inline]
49
pub unsafe fn from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self {
50
// SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a.
51
unsafe { &*vma.cast() }
52
}
53
54
/// Returns a raw pointer to this area.
55
#[inline]
56
pub fn as_ptr(&self) -> *mut bindings::vm_area_struct {
57
self.vma.get()
58
}
59
60
/// Access the underlying `mm_struct`.
61
#[inline]
62
pub fn mm(&self) -> &MmWithUser {
63
// SAFETY: By the type invariants, this `vm_area_struct` is valid and we hold the mmap/vma
64
// read lock or stronger. This implies that the underlying mm has a non-zero value of
65
// `mm_users`.
66
unsafe { MmWithUser::from_raw((*self.as_ptr()).vm_mm) }
67
}
68
69
/// Returns the flags associated with the virtual memory area.
70
///
71
/// The possible flags are a combination of the constants in [`flags`].
72
#[inline]
73
pub fn flags(&self) -> vm_flags_t {
74
// SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this
75
// access is not a data race.
76
unsafe { (*self.as_ptr()).__bindgen_anon_2.vm_flags }
77
}
78
79
/// Returns the (inclusive) start address of the virtual memory area.
80
#[inline]
81
pub fn start(&self) -> usize {
82
// SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this
83
// access is not a data race.
84
unsafe { (*self.as_ptr()).__bindgen_anon_1.__bindgen_anon_1.vm_start }
85
}
86
87
/// Returns the (exclusive) end address of the virtual memory area.
88
#[inline]
89
pub fn end(&self) -> usize {
90
// SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this
91
// access is not a data race.
92
unsafe { (*self.as_ptr()).__bindgen_anon_1.__bindgen_anon_1.vm_end }
93
}
94
95
/// Zap pages in the given page range.
96
///
97
/// This clears page table mappings for the range at the leaf level, leaving all other page
98
/// tables intact, and freeing any memory referenced by the VMA in this range. That is,
99
/// anonymous memory is completely freed, file-backed memory has its reference count on page
100
/// cache folio's dropped, any dirty data will still be written back to disk as usual.
101
///
102
/// It may seem odd that we clear at the leaf level, this is however a product of the page
103
/// table structure used to map physical memory into a virtual address space - each virtual
104
/// address actually consists of a bitmap of array indices into page tables, which form a
105
/// hierarchical page table level structure.
106
///
107
/// As a result, each page table level maps a multiple of page table levels below, and thus
108
/// span ever increasing ranges of pages. At the leaf or PTE level, we map the actual physical
109
/// memory.
110
///
111
/// It is here where a zap operates, as it the only place we can be certain of clearing without
112
/// impacting any other virtual mappings. It is an implementation detail as to whether the
113
/// kernel goes further in freeing unused page tables, but for the purposes of this operation
114
/// we must only assume that the leaf level is cleared.
115
#[inline]
116
pub fn zap_page_range_single(&self, address: usize, size: usize) {
117
let (end, did_overflow) = address.overflowing_add(size);
118
if did_overflow || address < self.start() || self.end() < end {
119
// TODO: call WARN_ONCE once Rust version of it is added
120
return;
121
}
122
123
// SAFETY: By the type invariants, the caller has read access to this VMA, which is
124
// sufficient for this method call. This method has no requirements on the vma flags. The
125
// address range is checked to be within the vma.
126
unsafe {
127
bindings::zap_page_range_single(self.as_ptr(), address, size, core::ptr::null_mut())
128
};
129
}
130
131
/// If the [`VM_MIXEDMAP`] flag is set, returns a [`VmaMixedMap`] to this VMA, otherwise
132
/// returns `None`.
133
///
134
/// This can be used to access methods that require [`VM_MIXEDMAP`] to be set.
135
///
136
/// [`VM_MIXEDMAP`]: flags::MIXEDMAP
137
#[inline]
138
pub fn as_mixedmap_vma(&self) -> Option<&VmaMixedMap> {
139
if self.flags() & flags::MIXEDMAP != 0 {
140
// SAFETY: We just checked that `VM_MIXEDMAP` is set. All other requirements are
141
// satisfied by the type invariants of `VmaRef`.
142
Some(unsafe { VmaMixedMap::from_raw(self.as_ptr()) })
143
} else {
144
None
145
}
146
}
147
}
148
149
/// A wrapper for the kernel's `struct vm_area_struct` with read access and [`VM_MIXEDMAP`] set.
150
///
151
/// It represents an area of virtual memory.
152
///
153
/// This struct is identical to [`VmaRef`] except that it must only be used when the
154
/// [`VM_MIXEDMAP`] flag is set on the vma.
155
///
156
/// # Invariants
157
///
158
/// The caller must hold the mmap read lock or the vma read lock. The `VM_MIXEDMAP` flag must be
159
/// set.
160
///
161
/// [`VM_MIXEDMAP`]: flags::MIXEDMAP
162
#[repr(transparent)]
163
pub struct VmaMixedMap {
164
vma: VmaRef,
165
}
166
167
// Make all `VmaRef` methods available on `VmaMixedMap`.
168
impl Deref for VmaMixedMap {
169
type Target = VmaRef;
170
171
#[inline]
172
fn deref(&self) -> &VmaRef {
173
&self.vma
174
}
175
}
176
177
impl VmaMixedMap {
178
/// Access a virtual memory area given a raw pointer.
179
///
180
/// # Safety
181
///
182
/// Callers must ensure that `vma` is valid for the duration of 'a, and that the mmap read lock
183
/// (or stronger) is held for at least the duration of 'a. The `VM_MIXEDMAP` flag must be set.
184
#[inline]
185
pub unsafe fn from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self {
186
// SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a.
187
unsafe { &*vma.cast() }
188
}
189
190
/// Maps a single page at the given address within the virtual memory area.
191
///
192
/// This operation does not take ownership of the page.
193
#[inline]
194
pub fn vm_insert_page(&self, address: usize, page: &Page) -> Result {
195
// SAFETY: By the type invariant of `Self` caller has read access and has verified that
196
// `VM_MIXEDMAP` is set. By invariant on `Page` the page has order 0.
197
to_result(unsafe { bindings::vm_insert_page(self.as_ptr(), address, page.as_ptr()) })
198
}
199
}
200
201
/// A configuration object for setting up a VMA in an `f_ops->mmap()` hook.
202
///
203
/// The `f_ops->mmap()` hook is called when a new VMA is being created, and the hook is able to
204
/// configure the VMA in various ways to fit the driver that owns it. Using `VmaNew` indicates that
205
/// you are allowed to perform operations on the VMA that can only be performed before the VMA is
206
/// fully initialized.
207
///
208
/// # Invariants
209
///
210
/// For the duration of 'a, the referenced vma must be undergoing initialization in an
211
/// `f_ops->mmap()` hook.
212
#[repr(transparent)]
213
pub struct VmaNew {
214
vma: VmaRef,
215
}
216
217
// Make all `VmaRef` methods available on `VmaNew`.
218
impl Deref for VmaNew {
219
type Target = VmaRef;
220
221
#[inline]
222
fn deref(&self) -> &VmaRef {
223
&self.vma
224
}
225
}
226
227
impl VmaNew {
228
/// Access a virtual memory area given a raw pointer.
229
///
230
/// # Safety
231
///
232
/// Callers must ensure that `vma` is undergoing initial vma setup for the duration of 'a.
233
#[inline]
234
pub unsafe fn from_raw<'a>(vma: *mut bindings::vm_area_struct) -> &'a Self {
235
// SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a.
236
unsafe { &*vma.cast() }
237
}
238
239
/// Internal method for updating the vma flags.
240
///
241
/// # Safety
242
///
243
/// This must not be used to set the flags to an invalid value.
244
#[inline]
245
unsafe fn update_flags(&self, set: vm_flags_t, unset: vm_flags_t) {
246
let mut flags = self.flags();
247
flags |= set;
248
flags &= !unset;
249
250
// SAFETY: This is not a data race: the vma is undergoing initial setup, so it's not yet
251
// shared. Additionally, `VmaNew` is `!Sync`, so it cannot be used to write in parallel.
252
// The caller promises that this does not set the flags to an invalid value.
253
unsafe { (*self.as_ptr()).__bindgen_anon_2.__vm_flags = flags };
254
}
255
256
/// Set the `VM_MIXEDMAP` flag on this vma.
257
///
258
/// This enables the vma to contain both `struct page` and pure PFN pages. Returns a reference
259
/// that can be used to call `vm_insert_page` on the vma.
260
#[inline]
261
pub fn set_mixedmap(&self) -> &VmaMixedMap {
262
// SAFETY: We don't yet provide a way to set VM_PFNMAP, so this cannot put the flags in an
263
// invalid state.
264
unsafe { self.update_flags(flags::MIXEDMAP, 0) };
265
266
// SAFETY: We just set `VM_MIXEDMAP` on the vma.
267
unsafe { VmaMixedMap::from_raw(self.vma.as_ptr()) }
268
}
269
270
/// Set the `VM_IO` flag on this vma.
271
///
272
/// This is used for memory mapped IO and similar. The flag tells other parts of the kernel to
273
/// avoid looking at the pages. For memory mapped IO this is useful as accesses to the pages
274
/// could have side effects.
275
#[inline]
276
pub fn set_io(&self) {
277
// SAFETY: Setting the VM_IO flag is always okay.
278
unsafe { self.update_flags(flags::IO, 0) };
279
}
280
281
/// Set the `VM_DONTEXPAND` flag on this vma.
282
///
283
/// This prevents the vma from being expanded with `mremap()`.
284
#[inline]
285
pub fn set_dontexpand(&self) {
286
// SAFETY: Setting the VM_DONTEXPAND flag is always okay.
287
unsafe { self.update_flags(flags::DONTEXPAND, 0) };
288
}
289
290
/// Set the `VM_DONTCOPY` flag on this vma.
291
///
292
/// This prevents the vma from being copied on fork. This option is only permanent if `VM_IO`
293
/// is set.
294
#[inline]
295
pub fn set_dontcopy(&self) {
296
// SAFETY: Setting the VM_DONTCOPY flag is always okay.
297
unsafe { self.update_flags(flags::DONTCOPY, 0) };
298
}
299
300
/// Set the `VM_DONTDUMP` flag on this vma.
301
///
302
/// This prevents the vma from being included in core dumps. This option is only permanent if
303
/// `VM_IO` is set.
304
#[inline]
305
pub fn set_dontdump(&self) {
306
// SAFETY: Setting the VM_DONTDUMP flag is always okay.
307
unsafe { self.update_flags(flags::DONTDUMP, 0) };
308
}
309
310
/// Returns whether `VM_READ` is set.
311
///
312
/// This flag indicates whether userspace is mapping this vma as readable.
313
#[inline]
314
pub fn readable(&self) -> bool {
315
(self.flags() & flags::READ) != 0
316
}
317
318
/// Try to clear the `VM_MAYREAD` flag, failing if `VM_READ` is set.
319
///
320
/// This flag indicates whether userspace is allowed to make this vma readable with
321
/// `mprotect()`.
322
///
323
/// Note that this operation is irreversible. Once `VM_MAYREAD` has been cleared, it can never
324
/// be set again.
325
#[inline]
326
pub fn try_clear_mayread(&self) -> Result {
327
if self.readable() {
328
return Err(EINVAL);
329
}
330
// SAFETY: Clearing `VM_MAYREAD` is okay when `VM_READ` is not set.
331
unsafe { self.update_flags(0, flags::MAYREAD) };
332
Ok(())
333
}
334
335
/// Returns whether `VM_WRITE` is set.
336
///
337
/// This flag indicates whether userspace is mapping this vma as writable.
338
#[inline]
339
pub fn writable(&self) -> bool {
340
(self.flags() & flags::WRITE) != 0
341
}
342
343
/// Try to clear the `VM_MAYWRITE` flag, failing if `VM_WRITE` is set.
344
///
345
/// This flag indicates whether userspace is allowed to make this vma writable with
346
/// `mprotect()`.
347
///
348
/// Note that this operation is irreversible. Once `VM_MAYWRITE` has been cleared, it can never
349
/// be set again.
350
#[inline]
351
pub fn try_clear_maywrite(&self) -> Result {
352
if self.writable() {
353
return Err(EINVAL);
354
}
355
// SAFETY: Clearing `VM_MAYWRITE` is okay when `VM_WRITE` is not set.
356
unsafe { self.update_flags(0, flags::MAYWRITE) };
357
Ok(())
358
}
359
360
/// Returns whether `VM_EXEC` is set.
361
///
362
/// This flag indicates whether userspace is mapping this vma as executable.
363
#[inline]
364
pub fn executable(&self) -> bool {
365
(self.flags() & flags::EXEC) != 0
366
}
367
368
/// Try to clear the `VM_MAYEXEC` flag, failing if `VM_EXEC` is set.
369
///
370
/// This flag indicates whether userspace is allowed to make this vma executable with
371
/// `mprotect()`.
372
///
373
/// Note that this operation is irreversible. Once `VM_MAYEXEC` has been cleared, it can never
374
/// be set again.
375
#[inline]
376
pub fn try_clear_mayexec(&self) -> Result {
377
if self.executable() {
378
return Err(EINVAL);
379
}
380
// SAFETY: Clearing `VM_MAYEXEC` is okay when `VM_EXEC` is not set.
381
unsafe { self.update_flags(0, flags::MAYEXEC) };
382
Ok(())
383
}
384
}
385
386
/// The integer type used for vma flags.
387
#[doc(inline)]
388
pub use bindings::vm_flags_t;
389
390
/// All possible flags for [`VmaRef`].
391
pub mod flags {
392
use super::vm_flags_t;
393
use crate::bindings;
394
395
/// No flags are set.
396
pub const NONE: vm_flags_t = bindings::VM_NONE as vm_flags_t;
397
398
/// Mapping allows reads.
399
pub const READ: vm_flags_t = bindings::VM_READ as vm_flags_t;
400
401
/// Mapping allows writes.
402
pub const WRITE: vm_flags_t = bindings::VM_WRITE as vm_flags_t;
403
404
/// Mapping allows execution.
405
pub const EXEC: vm_flags_t = bindings::VM_EXEC as vm_flags_t;
406
407
/// Mapping is shared.
408
pub const SHARED: vm_flags_t = bindings::VM_SHARED as vm_flags_t;
409
410
/// Mapping may be updated to allow reads.
411
pub const MAYREAD: vm_flags_t = bindings::VM_MAYREAD as vm_flags_t;
412
413
/// Mapping may be updated to allow writes.
414
pub const MAYWRITE: vm_flags_t = bindings::VM_MAYWRITE as vm_flags_t;
415
416
/// Mapping may be updated to allow execution.
417
pub const MAYEXEC: vm_flags_t = bindings::VM_MAYEXEC as vm_flags_t;
418
419
/// Mapping may be updated to be shared.
420
pub const MAYSHARE: vm_flags_t = bindings::VM_MAYSHARE as vm_flags_t;
421
422
/// Page-ranges managed without `struct page`, just pure PFN.
423
pub const PFNMAP: vm_flags_t = bindings::VM_PFNMAP as vm_flags_t;
424
425
/// Memory mapped I/O or similar.
426
pub const IO: vm_flags_t = bindings::VM_IO as vm_flags_t;
427
428
/// Do not copy this vma on fork.
429
pub const DONTCOPY: vm_flags_t = bindings::VM_DONTCOPY as vm_flags_t;
430
431
/// Cannot expand with mremap().
432
pub const DONTEXPAND: vm_flags_t = bindings::VM_DONTEXPAND as vm_flags_t;
433
434
/// Lock the pages covered when they are faulted in.
435
pub const LOCKONFAULT: vm_flags_t = bindings::VM_LOCKONFAULT as vm_flags_t;
436
437
/// Is a VM accounted object.
438
pub const ACCOUNT: vm_flags_t = bindings::VM_ACCOUNT as vm_flags_t;
439
440
/// Should the VM suppress accounting.
441
pub const NORESERVE: vm_flags_t = bindings::VM_NORESERVE as vm_flags_t;
442
443
/// Huge TLB Page VM.
444
pub const HUGETLB: vm_flags_t = bindings::VM_HUGETLB as vm_flags_t;
445
446
/// Synchronous page faults. (DAX-specific)
447
pub const SYNC: vm_flags_t = bindings::VM_SYNC as vm_flags_t;
448
449
/// Architecture-specific flag.
450
pub const ARCH_1: vm_flags_t = bindings::VM_ARCH_1 as vm_flags_t;
451
452
/// Wipe VMA contents in child on fork.
453
pub const WIPEONFORK: vm_flags_t = bindings::VM_WIPEONFORK as vm_flags_t;
454
455
/// Do not include in the core dump.
456
pub const DONTDUMP: vm_flags_t = bindings::VM_DONTDUMP as vm_flags_t;
457
458
/// Not soft dirty clean area.
459
pub const SOFTDIRTY: vm_flags_t = bindings::VM_SOFTDIRTY as vm_flags_t;
460
461
/// Can contain `struct page` and pure PFN pages.
462
pub const MIXEDMAP: vm_flags_t = bindings::VM_MIXEDMAP as vm_flags_t;
463
464
/// MADV_HUGEPAGE marked this vma.
465
pub const HUGEPAGE: vm_flags_t = bindings::VM_HUGEPAGE as vm_flags_t;
466
467
/// MADV_NOHUGEPAGE marked this vma.
468
pub const NOHUGEPAGE: vm_flags_t = bindings::VM_NOHUGEPAGE as vm_flags_t;
469
470
/// KSM may merge identical pages.
471
pub const MERGEABLE: vm_flags_t = bindings::VM_MERGEABLE as vm_flags_t;
472
}
473
474