// SPDX-License-Identifier: GPL-2.012// Copyright (C) 2024 Google LLC.34//! Virtual memory.5//!6//! This module deals with managing a single VMA in the address space of a userspace process. Each7//! VMA corresponds to a region of memory that the userspace process can access, and the VMA lets8//! you control what happens when userspace reads or writes to that region of memory.9//!10//! The module has several different Rust types that all correspond to the C type called11//! `vm_area_struct`. The different structs represent what kind of access you have to the VMA, e.g.12//! [`VmaRef`] is used when you hold the mmap or vma read lock. Using the appropriate struct13//! ensures that you can't, for example, accidentally call a function that requires holding the14//! write lock when you only hold the read lock.1516use crate::{17bindings,18error::{code::EINVAL, to_result, Result},19mm::MmWithUser,20page::Page,21types::Opaque,22};2324use core::ops::Deref;2526/// A wrapper for the kernel's `struct vm_area_struct` with read access.27///28/// It represents an area of virtual memory.29///30/// # Invariants31///32/// The caller must hold the mmap read lock or the vma read lock.33#[repr(transparent)]34pub struct VmaRef {35vma: Opaque<bindings::vm_area_struct>,36}3738// Methods you can call when holding the mmap or vma read lock (or stronger). They must be usable39// no matter what the vma flags are.40impl VmaRef {41/// Access a virtual memory area given a raw pointer.42///43/// # Safety44///45/// Callers must ensure that `vma` is valid for the duration of 'a, and that the mmap or vma46/// read lock (or stronger) is held for at least the duration of 'a.47#[inline]48pub unsafe fn from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self {49// SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a.50unsafe { &*vma.cast() }51}5253/// Returns a raw pointer to this area.54#[inline]55pub fn as_ptr(&self) -> *mut bindings::vm_area_struct {56self.vma.get()57}5859/// Access the underlying `mm_struct`.60#[inline]61pub fn mm(&self) -> &MmWithUser {62// SAFETY: By the type invariants, this `vm_area_struct` is valid and we hold the mmap/vma63// read lock or stronger. This implies that the underlying mm has a non-zero value of64// `mm_users`.65unsafe { MmWithUser::from_raw((*self.as_ptr()).vm_mm) }66}6768/// Returns the flags associated with the virtual memory area.69///70/// The possible flags are a combination of the constants in [`flags`].71#[inline]72pub fn flags(&self) -> vm_flags_t {73// SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this74// access is not a data race.75unsafe { (*self.as_ptr()).__bindgen_anon_2.vm_flags }76}7778/// Returns the (inclusive) start address of the virtual memory area.79#[inline]80pub fn start(&self) -> usize {81// SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this82// access is not a data race.83unsafe { (*self.as_ptr()).__bindgen_anon_1.__bindgen_anon_1.vm_start }84}8586/// Returns the (exclusive) end address of the virtual memory area.87#[inline]88pub fn end(&self) -> usize {89// SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this90// access is not a data race.91unsafe { (*self.as_ptr()).__bindgen_anon_1.__bindgen_anon_1.vm_end }92}9394/// Zap pages in the given page range.95///96/// This clears page table mappings for the range at the leaf level, leaving all other page97/// tables intact, and freeing any memory referenced by the VMA in this range. That is,98/// anonymous memory is completely freed, file-backed memory has its reference count on page99/// cache folio's dropped, any dirty data will still be written back to disk as usual.100///101/// It may seem odd that we clear at the leaf level, this is however a product of the page102/// table structure used to map physical memory into a virtual address space - each virtual103/// address actually consists of a bitmap of array indices into page tables, which form a104/// hierarchical page table level structure.105///106/// As a result, each page table level maps a multiple of page table levels below, and thus107/// span ever increasing ranges of pages. At the leaf or PTE level, we map the actual physical108/// memory.109///110/// It is here where a zap operates, as it the only place we can be certain of clearing without111/// impacting any other virtual mappings. It is an implementation detail as to whether the112/// kernel goes further in freeing unused page tables, but for the purposes of this operation113/// we must only assume that the leaf level is cleared.114#[inline]115pub fn zap_page_range_single(&self, address: usize, size: usize) {116let (end, did_overflow) = address.overflowing_add(size);117if did_overflow || address < self.start() || self.end() < end {118// TODO: call WARN_ONCE once Rust version of it is added119return;120}121122// SAFETY: By the type invariants, the caller has read access to this VMA, which is123// sufficient for this method call. This method has no requirements on the vma flags. The124// address range is checked to be within the vma.125unsafe {126bindings::zap_page_range_single(self.as_ptr(), address, size, core::ptr::null_mut())127};128}129130/// If the [`VM_MIXEDMAP`] flag is set, returns a [`VmaMixedMap`] to this VMA, otherwise131/// returns `None`.132///133/// This can be used to access methods that require [`VM_MIXEDMAP`] to be set.134///135/// [`VM_MIXEDMAP`]: flags::MIXEDMAP136#[inline]137pub fn as_mixedmap_vma(&self) -> Option<&VmaMixedMap> {138if self.flags() & flags::MIXEDMAP != 0 {139// SAFETY: We just checked that `VM_MIXEDMAP` is set. All other requirements are140// satisfied by the type invariants of `VmaRef`.141Some(unsafe { VmaMixedMap::from_raw(self.as_ptr()) })142} else {143None144}145}146}147148/// A wrapper for the kernel's `struct vm_area_struct` with read access and [`VM_MIXEDMAP`] set.149///150/// It represents an area of virtual memory.151///152/// This struct is identical to [`VmaRef`] except that it must only be used when the153/// [`VM_MIXEDMAP`] flag is set on the vma.154///155/// # Invariants156///157/// The caller must hold the mmap read lock or the vma read lock. The `VM_MIXEDMAP` flag must be158/// set.159///160/// [`VM_MIXEDMAP`]: flags::MIXEDMAP161#[repr(transparent)]162pub struct VmaMixedMap {163vma: VmaRef,164}165166// Make all `VmaRef` methods available on `VmaMixedMap`.167impl Deref for VmaMixedMap {168type Target = VmaRef;169170#[inline]171fn deref(&self) -> &VmaRef {172&self.vma173}174}175176impl VmaMixedMap {177/// Access a virtual memory area given a raw pointer.178///179/// # Safety180///181/// Callers must ensure that `vma` is valid for the duration of 'a, and that the mmap read lock182/// (or stronger) is held for at least the duration of 'a. The `VM_MIXEDMAP` flag must be set.183#[inline]184pub unsafe fn from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self {185// SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a.186unsafe { &*vma.cast() }187}188189/// Maps a single page at the given address within the virtual memory area.190///191/// This operation does not take ownership of the page.192#[inline]193pub fn vm_insert_page(&self, address: usize, page: &Page) -> Result {194// SAFETY: By the type invariant of `Self` caller has read access and has verified that195// `VM_MIXEDMAP` is set. By invariant on `Page` the page has order 0.196to_result(unsafe { bindings::vm_insert_page(self.as_ptr(), address, page.as_ptr()) })197}198}199200/// A configuration object for setting up a VMA in an `f_ops->mmap()` hook.201///202/// The `f_ops->mmap()` hook is called when a new VMA is being created, and the hook is able to203/// configure the VMA in various ways to fit the driver that owns it. Using `VmaNew` indicates that204/// you are allowed to perform operations on the VMA that can only be performed before the VMA is205/// fully initialized.206///207/// # Invariants208///209/// For the duration of 'a, the referenced vma must be undergoing initialization in an210/// `f_ops->mmap()` hook.211#[repr(transparent)]212pub struct VmaNew {213vma: VmaRef,214}215216// Make all `VmaRef` methods available on `VmaNew`.217impl Deref for VmaNew {218type Target = VmaRef;219220#[inline]221fn deref(&self) -> &VmaRef {222&self.vma223}224}225226impl VmaNew {227/// Access a virtual memory area given a raw pointer.228///229/// # Safety230///231/// Callers must ensure that `vma` is undergoing initial vma setup for the duration of 'a.232#[inline]233pub unsafe fn from_raw<'a>(vma: *mut bindings::vm_area_struct) -> &'a Self {234// SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a.235unsafe { &*vma.cast() }236}237238/// Internal method for updating the vma flags.239///240/// # Safety241///242/// This must not be used to set the flags to an invalid value.243#[inline]244unsafe fn update_flags(&self, set: vm_flags_t, unset: vm_flags_t) {245let mut flags = self.flags();246flags |= set;247flags &= !unset;248249// SAFETY: This is not a data race: the vma is undergoing initial setup, so it's not yet250// shared. Additionally, `VmaNew` is `!Sync`, so it cannot be used to write in parallel.251// The caller promises that this does not set the flags to an invalid value.252unsafe { (*self.as_ptr()).__bindgen_anon_2.__vm_flags = flags };253}254255/// Set the `VM_MIXEDMAP` flag on this vma.256///257/// This enables the vma to contain both `struct page` and pure PFN pages. Returns a reference258/// that can be used to call `vm_insert_page` on the vma.259#[inline]260pub fn set_mixedmap(&self) -> &VmaMixedMap {261// SAFETY: We don't yet provide a way to set VM_PFNMAP, so this cannot put the flags in an262// invalid state.263unsafe { self.update_flags(flags::MIXEDMAP, 0) };264265// SAFETY: We just set `VM_MIXEDMAP` on the vma.266unsafe { VmaMixedMap::from_raw(self.vma.as_ptr()) }267}268269/// Set the `VM_IO` flag on this vma.270///271/// This is used for memory mapped IO and similar. The flag tells other parts of the kernel to272/// avoid looking at the pages. For memory mapped IO this is useful as accesses to the pages273/// could have side effects.274#[inline]275pub fn set_io(&self) {276// SAFETY: Setting the VM_IO flag is always okay.277unsafe { self.update_flags(flags::IO, 0) };278}279280/// Set the `VM_DONTEXPAND` flag on this vma.281///282/// This prevents the vma from being expanded with `mremap()`.283#[inline]284pub fn set_dontexpand(&self) {285// SAFETY: Setting the VM_DONTEXPAND flag is always okay.286unsafe { self.update_flags(flags::DONTEXPAND, 0) };287}288289/// Set the `VM_DONTCOPY` flag on this vma.290///291/// This prevents the vma from being copied on fork. This option is only permanent if `VM_IO`292/// is set.293#[inline]294pub fn set_dontcopy(&self) {295// SAFETY: Setting the VM_DONTCOPY flag is always okay.296unsafe { self.update_flags(flags::DONTCOPY, 0) };297}298299/// Set the `VM_DONTDUMP` flag on this vma.300///301/// This prevents the vma from being included in core dumps. This option is only permanent if302/// `VM_IO` is set.303#[inline]304pub fn set_dontdump(&self) {305// SAFETY: Setting the VM_DONTDUMP flag is always okay.306unsafe { self.update_flags(flags::DONTDUMP, 0) };307}308309/// Returns whether `VM_READ` is set.310///311/// This flag indicates whether userspace is mapping this vma as readable.312#[inline]313pub fn readable(&self) -> bool {314(self.flags() & flags::READ) != 0315}316317/// Try to clear the `VM_MAYREAD` flag, failing if `VM_READ` is set.318///319/// This flag indicates whether userspace is allowed to make this vma readable with320/// `mprotect()`.321///322/// Note that this operation is irreversible. Once `VM_MAYREAD` has been cleared, it can never323/// be set again.324#[inline]325pub fn try_clear_mayread(&self) -> Result {326if self.readable() {327return Err(EINVAL);328}329// SAFETY: Clearing `VM_MAYREAD` is okay when `VM_READ` is not set.330unsafe { self.update_flags(0, flags::MAYREAD) };331Ok(())332}333334/// Returns whether `VM_WRITE` is set.335///336/// This flag indicates whether userspace is mapping this vma as writable.337#[inline]338pub fn writable(&self) -> bool {339(self.flags() & flags::WRITE) != 0340}341342/// Try to clear the `VM_MAYWRITE` flag, failing if `VM_WRITE` is set.343///344/// This flag indicates whether userspace is allowed to make this vma writable with345/// `mprotect()`.346///347/// Note that this operation is irreversible. Once `VM_MAYWRITE` has been cleared, it can never348/// be set again.349#[inline]350pub fn try_clear_maywrite(&self) -> Result {351if self.writable() {352return Err(EINVAL);353}354// SAFETY: Clearing `VM_MAYWRITE` is okay when `VM_WRITE` is not set.355unsafe { self.update_flags(0, flags::MAYWRITE) };356Ok(())357}358359/// Returns whether `VM_EXEC` is set.360///361/// This flag indicates whether userspace is mapping this vma as executable.362#[inline]363pub fn executable(&self) -> bool {364(self.flags() & flags::EXEC) != 0365}366367/// Try to clear the `VM_MAYEXEC` flag, failing if `VM_EXEC` is set.368///369/// This flag indicates whether userspace is allowed to make this vma executable with370/// `mprotect()`.371///372/// Note that this operation is irreversible. Once `VM_MAYEXEC` has been cleared, it can never373/// be set again.374#[inline]375pub fn try_clear_mayexec(&self) -> Result {376if self.executable() {377return Err(EINVAL);378}379// SAFETY: Clearing `VM_MAYEXEC` is okay when `VM_EXEC` is not set.380unsafe { self.update_flags(0, flags::MAYEXEC) };381Ok(())382}383}384385/// The integer type used for vma flags.386#[doc(inline)]387pub use bindings::vm_flags_t;388389/// All possible flags for [`VmaRef`].390pub mod flags {391use super::vm_flags_t;392use crate::bindings;393394/// No flags are set.395pub const NONE: vm_flags_t = bindings::VM_NONE as vm_flags_t;396397/// Mapping allows reads.398pub const READ: vm_flags_t = bindings::VM_READ as vm_flags_t;399400/// Mapping allows writes.401pub const WRITE: vm_flags_t = bindings::VM_WRITE as vm_flags_t;402403/// Mapping allows execution.404pub const EXEC: vm_flags_t = bindings::VM_EXEC as vm_flags_t;405406/// Mapping is shared.407pub const SHARED: vm_flags_t = bindings::VM_SHARED as vm_flags_t;408409/// Mapping may be updated to allow reads.410pub const MAYREAD: vm_flags_t = bindings::VM_MAYREAD as vm_flags_t;411412/// Mapping may be updated to allow writes.413pub const MAYWRITE: vm_flags_t = bindings::VM_MAYWRITE as vm_flags_t;414415/// Mapping may be updated to allow execution.416pub const MAYEXEC: vm_flags_t = bindings::VM_MAYEXEC as vm_flags_t;417418/// Mapping may be updated to be shared.419pub const MAYSHARE: vm_flags_t = bindings::VM_MAYSHARE as vm_flags_t;420421/// Page-ranges managed without `struct page`, just pure PFN.422pub const PFNMAP: vm_flags_t = bindings::VM_PFNMAP as vm_flags_t;423424/// Memory mapped I/O or similar.425pub const IO: vm_flags_t = bindings::VM_IO as vm_flags_t;426427/// Do not copy this vma on fork.428pub const DONTCOPY: vm_flags_t = bindings::VM_DONTCOPY as vm_flags_t;429430/// Cannot expand with mremap().431pub const DONTEXPAND: vm_flags_t = bindings::VM_DONTEXPAND as vm_flags_t;432433/// Lock the pages covered when they are faulted in.434pub const LOCKONFAULT: vm_flags_t = bindings::VM_LOCKONFAULT as vm_flags_t;435436/// Is a VM accounted object.437pub const ACCOUNT: vm_flags_t = bindings::VM_ACCOUNT as vm_flags_t;438439/// Should the VM suppress accounting.440pub const NORESERVE: vm_flags_t = bindings::VM_NORESERVE as vm_flags_t;441442/// Huge TLB Page VM.443pub const HUGETLB: vm_flags_t = bindings::VM_HUGETLB as vm_flags_t;444445/// Synchronous page faults. (DAX-specific)446pub const SYNC: vm_flags_t = bindings::VM_SYNC as vm_flags_t;447448/// Architecture-specific flag.449pub const ARCH_1: vm_flags_t = bindings::VM_ARCH_1 as vm_flags_t;450451/// Wipe VMA contents in child on fork.452pub const WIPEONFORK: vm_flags_t = bindings::VM_WIPEONFORK as vm_flags_t;453454/// Do not include in the core dump.455pub const DONTDUMP: vm_flags_t = bindings::VM_DONTDUMP as vm_flags_t;456457/// Not soft dirty clean area.458pub const SOFTDIRTY: vm_flags_t = bindings::VM_SOFTDIRTY as vm_flags_t;459460/// Can contain `struct page` and pure PFN pages.461pub const MIXEDMAP: vm_flags_t = bindings::VM_MIXEDMAP as vm_flags_t;462463/// MADV_HUGEPAGE marked this vma.464pub const HUGEPAGE: vm_flags_t = bindings::VM_HUGEPAGE as vm_flags_t;465466/// MADV_NOHUGEPAGE marked this vma.467pub const NOHUGEPAGE: vm_flags_t = bindings::VM_NOHUGEPAGE as vm_flags_t;468469/// KSM may merge identical pages.470pub const MERGEABLE: vm_flags_t = bindings::VM_MERGEABLE as vm_flags_t;471}472473474