memmap2/
advice.rs

1/// Values supported by [`Mmap::advise`][crate::Mmap::advise] and [`MmapMut::advise`][crate::MmapMut::advise] functions.
2///
3/// See [madvise()](https://man7.org/linux/man-pages/man2/madvise.2.html) map page.
4#[repr(i32)]
5#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
6pub enum Advice {
7    /// **MADV_NORMAL**
8    ///
9    /// No special treatment.  This is the default.
10    Normal = libc::MADV_NORMAL,
11
12    /// **MADV_RANDOM**
13    ///
14    /// Expect page references in random order.  (Hence, read
15    /// ahead may be less useful than normally.)
16    Random = libc::MADV_RANDOM,
17
18    /// **MADV_SEQUENTIAL**
19    ///
20    /// Expect page references in sequential order.  (Hence, pages
21    /// in the given range can be aggressively read ahead, and may
22    /// be freed soon after they are accessed.)
23    Sequential = libc::MADV_SEQUENTIAL,
24
25    /// **MADV_WILLNEED**
26    ///
27    /// Expect access in the near future.  (Hence, it might be a
28    /// good idea to read some pages ahead.)
29    WillNeed = libc::MADV_WILLNEED,
30
31    /// **MADV_DONTFORK** - Linux only (since Linux 2.6.16)
32    ///
33    /// Do not make the pages in this range available to the child
34    /// after a fork(2).  This is useful to prevent copy-on-write
35    /// semantics from changing the physical location of a page if
36    /// the parent writes to it after a fork(2).  (Such page
37    /// relocations cause problems for hardware that DMAs into the
38    /// page.)
39    #[cfg(target_os = "linux")]
40    DontFork = libc::MADV_DONTFORK,
41
42    /// **MADV_DOFORK** - Linux only (since Linux 2.6.16)
43    ///
44    /// Undo the effect of MADV_DONTFORK, restoring the default
45    /// behavior, whereby a mapping is inherited across fork(2).
46    #[cfg(target_os = "linux")]
47    DoFork = libc::MADV_DOFORK,
48
49    /// **MADV_MERGEABLE** - Linux only (since Linux 2.6.32)
50    ///
51    /// Enable Kernel Samepage Merging (KSM) for the pages in the
52    /// range specified by addr and length.  The kernel regularly
53    /// scans those areas of user memory that have been marked as
54    /// mergeable, looking for pages with identical content.
55    /// These are replaced by a single write-protected page (which
56    /// is automatically copied if a process later wants to update
57    /// the content of the page).  KSM merges only private
58    /// anonymous pages (see mmap(2)).
59    ///
60    /// The KSM feature is intended for applications that generate
61    /// many instances of the same data (e.g., virtualization
62    /// systems such as KVM).  It can consume a lot of processing
63    /// power; use with care.  See the Linux kernel source file
64    /// Documentation/admin-guide/mm/ksm.rst for more details.
65    ///
66    /// The MADV_MERGEABLE and MADV_UNMERGEABLE operations are
67    /// available only if the kernel was configured with
68    /// CONFIG_KSM.
69    #[cfg(target_os = "linux")]
70    Mergeable = libc::MADV_MERGEABLE,
71
72    /// **MADV_UNMERGEABLE** - Linux only (since Linux 2.6.32)
73    ///
74    /// Undo the effect of an earlier MADV_MERGEABLE operation on
75    /// the specified address range; KSM unmerges whatever pages
76    /// it had merged in the address range specified by addr and
77    /// length.
78    #[cfg(target_os = "linux")]
79    Unmergeable = libc::MADV_UNMERGEABLE,
80
81    /// **MADV_HUGEPAGE** - Linux only (since Linux 2.6.38)
82    ///
83    /// Enable Transparent Huge Pages (THP) for pages in the range
84    /// specified by addr and length.  Currently, Transparent Huge
85    /// Pages work only with private anonymous pages (see
86    /// mmap(2)).  The kernel will regularly scan the areas marked
87    /// as huge page candidates to replace them with huge pages.
88    /// The kernel will also allocate huge pages directly when the
89    /// region is naturally aligned to the huge page size (see
90    /// posix_memalign(2)).
91    ///
92    /// This feature is primarily aimed at applications that use
93    /// large mappings of data and access large regions of that
94    /// memory at a time (e.g., virtualization systems such as
95    /// QEMU).  It can very easily waste memory (e.g., a 2 MB
96    /// mapping that only ever accesses 1 byte will result in 2 MB
97    /// of wired memory instead of one 4 KB page).  See the Linux
98    /// kernel source file
99    /// Documentation/admin-guide/mm/transhuge.rst for more
100    /// details.
101    ///
102    /// Most common kernels configurations provide MADV_HUGEPAGE-
103    /// style behavior by default, and thus MADV_HUGEPAGE is
104    /// normally not necessary.  It is mostly intended for
105    /// embedded systems, where MADV_HUGEPAGE-style behavior may
106    /// not be enabled by default in the kernel.  On such systems,
107    /// this flag can be used in order to selectively enable THP.
108    /// Whenever MADV_HUGEPAGE is used, it should always be in
109    /// regions of memory with an access pattern that the
110    /// developer knows in advance won't risk to increase the
111    /// memory footprint of the application when transparent
112    /// hugepages are enabled.
113    ///
114    /// The MADV_HUGEPAGE and MADV_NOHUGEPAGE operations are
115    /// available only if the kernel was configured with
116    /// CONFIG_TRANSPARENT_HUGEPAGE.
117    #[cfg(target_os = "linux")]
118    HugePage = libc::MADV_HUGEPAGE,
119
120    /// **MADV_NOHUGEPAGE** - Linux only (since Linux 2.6.38)
121    ///
122    /// Ensures that memory in the address range specified by addr
123    /// and length will not be backed by transparent hugepages.
124    #[cfg(target_os = "linux")]
125    NoHugePage = libc::MADV_NOHUGEPAGE,
126
127    /// **MADV_DONTDUMP** - Linux only (since Linux 3.4)
128    ///
129    /// Exclude from a core dump those pages in the range
130    /// specified by addr and length.  This is useful in
131    /// applications that have large areas of memory that are
132    /// known not to be useful in a core dump.  The effect of
133    /// **MADV_DONTDUMP** takes precedence over the bit mask that is
134    /// set via the `/proc/[pid]/coredump_filter` file (see
135    /// core(5)).
136    #[cfg(target_os = "linux")]
137    DontDump = libc::MADV_DONTDUMP,
138
139    /// **MADV_DODUMP** - Linux only (since Linux 3.4)
140    ///
141    /// Undo the effect of an earlier MADV_DONTDUMP.
142    #[cfg(target_os = "linux")]
143    DoDump = libc::MADV_DODUMP,
144
145    /// **MADV_HWPOISON** - Linux only (since Linux 2.6.32)
146    ///
147    /// Poison the pages in the range specified by addr and length
148    /// and handle subsequent references to those pages like a
149    /// hardware memory corruption.  This operation is available
150    /// only for privileged (CAP_SYS_ADMIN) processes.  This
151    /// operation may result in the calling process receiving a
152    /// SIGBUS and the page being unmapped.
153    ///
154    /// This feature is intended for testing of memory error-
155    /// handling code; it is available only if the kernel was
156    /// configured with CONFIG_MEMORY_FAILURE.
157    #[cfg(target_os = "linux")]
158    HwPoison = libc::MADV_HWPOISON,
159
160    /// **MADV_POPULATE_READ** - Linux only (since Linux 5.14)
161    ///
162    /// Populate  (prefault)  page  tables readable, faulting in all
163    /// pages in the range just as  if  manually  reading  from  each
164    /// page; however, avoid the actual memory access that would have
165    /// been performed after handling the fault.
166    ///
167    /// In contrast to MAP_POPULATE, MADV_POPULATE_READ does not hide
168    /// errors,  can  be  applied to (parts of) existing mappings and
169    /// will always populate (prefault) page  tables  readable.   One
170    /// example  use  case is prefaulting a file mapping, reading all
171    /// file content from disk; however, pages won't be  dirtied  and
172    /// consequently  won't  have  to  be  written  back to disk when
173    /// evicting the pages from memory.
174    ///
175    /// Depending on the underlying mapping, map the shared zeropage,
176    /// preallocate  memory  or  read the underlying file; files with
177    /// holes might or might not preallocate blocks.   If  populating
178    /// fails, a SIGBUS signal is not generated; instead, an error is
179    /// returned.
180    ///
181    /// If MADV_POPULATE_READ succeeds, all  page  tables  have  been
182    /// populated  (prefaulted) readable once.  If MADV_POPULATE_READ
183    /// fails, some page tables might have been populated.
184    ///
185    /// MADV_POPULATE_READ cannot be applied to mappings without read
186    /// permissions  and  special  mappings,  for  example,  mappings
187    /// marked with kernel-internal flags such as VM_PFNMAP or VM_IO,
188    /// or secret memory regions created using memfd_secret(2).
189    ///
190    /// Note  that with MADV_POPULATE_READ, the process can be killed
191    /// at any moment when the system runs out of memory.
192    #[cfg(target_os = "linux")]
193    PopulateRead = libc::MADV_POPULATE_READ,
194
195    /// **MADV_POPULATE_WRITE** - Linux only (since Linux 5.14)
196    ///
197    /// Populate (prefault) page tables  writable,  faulting  in  all
198    /// pages  in  the range just as if manually writing to each each
199    /// page; however, avoid the actual memory access that would have
200    /// been performed after handling the fault.
201    ///
202    /// In  contrast  to  MAP_POPULATE,  MADV_POPULATE_WRITE does not
203    /// hide errors, can be applied to (parts of)  existing  mappings
204    /// and  will  always  populate  (prefault) page tables writable.
205    /// One example use case is preallocating  memory,  breaking  any
206    /// CoW (Copy on Write).
207    ///
208    /// Depending  on  the  underlying mapping, preallocate memory or
209    /// read the underlying file; files with holes  will  preallocate
210    /// blocks.   If  populating fails, a SIGBUS signal is not gener‐
211    /// ated; instead, an error is returned.
212    ///
213    /// If MADV_POPULATE_WRITE succeeds, all page  tables  have  been
214    /// populated (prefaulted) writable once.  If MADV_POPULATE_WRITE
215    /// fails, some page tables might have been populated.
216    ///
217    /// MADV_POPULATE_WRITE cannot be  applied  to  mappings  without
218    /// write permissions and special mappings, for example, mappings
219    /// marked with kernel-internal flags such as VM_PFNMAP or VM_IO,
220    /// or secret memory regions created using memfd_secret(2).
221    ///
222    /// Note that with MADV_POPULATE_WRITE, the process can be killed
223    /// at any moment when the system runs out of memory.
224    #[cfg(target_os = "linux")]
225    PopulateWrite = libc::MADV_POPULATE_WRITE,
226
227    /// **MADV_ZERO_WIRED_PAGES** - Darwin only
228    ///
229    /// Indicates that the application would like the wired pages in this address range to be
230    /// zeroed out if the address range is deallocated without first unwiring the pages (i.e.
231    /// a munmap(2) without a preceding munlock(2) or the application quits).  This is used
232    /// with madvise() system call.
233    #[cfg(any(target_os = "macos", target_os = "ios"))]
234    ZeroWiredPages = libc::MADV_ZERO_WIRED_PAGES,
235}
236
237/// Values supported by [`Mmap::unsafe_advise`][crate::Mmap::unsafe_advise] and [`MmapMut::unsafe_advise`][crate::MmapMut::unsafe_advise] functions.
238///
239/// These flags can be passed to the [madvise (2)][man_page] system call
240/// and effects on the mapped pages which are conceptually writes,
241/// i.e. the change the observable contents of these pages which
242/// implies undefined behaviour if the mapping is still borrowed.
243///
244/// Hence, these potentially unsafe flags must be used with the unsafe
245/// methods and the programmer has to justify that the code
246/// does not keep any borrows of the mapping active while the mapped pages
247/// are updated by the kernel's memory management subsystem.
248///
249/// [man_page]: https://man7.org/linux/man-pages/man2/madvise.2.html
250#[repr(i32)]
251#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
252pub enum UncheckedAdvice {
253    /// **MADV_DONTNEED**
254    ///
255    /// Do not expect access in the near future.  (For the time
256    /// being, the application is finished with the given range,
257    /// so the kernel can free resources associated with it.)
258    ///
259    /// After a successful MADV_DONTNEED operation, the semantics
260    /// of memory access in the specified region are changed:
261    /// subsequent accesses of pages in the range will succeed,
262    /// but will result in either repopulating the memory contents
263    /// from the up-to-date contents of the underlying mapped file
264    /// (for shared file mappings, shared anonymous mappings, and
265    /// shmem-based techniques such as System V shared memory
266    /// segments) or zero-fill-on-demand pages for anonymous
267    /// private mappings.
268    ///
269    /// Note that, when applied to shared mappings, MADV_DONTNEED
270    /// might not lead to immediate freeing of the pages in the
271    /// range.  The kernel is free to delay freeing the pages
272    /// until an appropriate moment.  The resident set size (RSS)
273    /// of the calling process will be immediately reduced
274    /// however.
275    ///
276    /// **MADV_DONTNEED** cannot be applied to locked pages, Huge TLB
277    /// pages, or VM_PFNMAP pages.  (Pages marked with the kernel-
278    /// internal VM_PFNMAP flag are special memory areas that are
279    /// not managed by the virtual memory subsystem.  Such pages
280    /// are typically created by device drivers that map the pages
281    /// into user space.)
282    ///
283    /// # Safety
284    ///
285    /// Using the returned value with conceptually write to the
286    /// mapped pages, i.e. borrowing the mapping when the pages
287    /// are freed results in undefined behaviour.
288    DontNeed = libc::MADV_DONTNEED,
289
290    //
291    // The rest are Linux-specific
292    //
293    /// **MADV_FREE** - Linux (since Linux 4.5) and Darwin
294    ///
295    /// The application no longer requires the pages in the range
296    /// specified by addr and len.  The kernel can thus free these
297    /// pages, but the freeing could be delayed until memory
298    /// pressure occurs.  For each of the pages that has been
299    /// marked to be freed but has not yet been freed, the free
300    /// operation will be canceled if the caller writes into the
301    /// page.  After a successful MADV_FREE operation, any stale
302    /// data (i.e., dirty, unwritten pages) will be lost when the
303    /// kernel frees the pages.  However, subsequent writes to
304    /// pages in the range will succeed and then kernel cannot
305    /// free those dirtied pages, so that the caller can always
306    /// see just written data.  If there is no subsequent write,
307    /// the kernel can free the pages at any time.  Once pages in
308    /// the range have been freed, the caller will see zero-fill-
309    /// on-demand pages upon subsequent page references.
310    ///
311    /// The MADV_FREE operation can be applied only to private
312    /// anonymous pages (see mmap(2)).  In Linux before version
313    /// 4.12, when freeing pages on a swapless system, the pages
314    /// in the given range are freed instantly, regardless of
315    /// memory pressure.
316    ///
317    /// # Safety
318    ///
319    /// Using the returned value with conceptually write to the
320    /// mapped pages, i.e. borrowing the mapping while the pages
321    /// are still being freed results in undefined behaviour.
322    #[cfg(any(target_os = "linux", target_os = "macos", target_os = "ios"))]
323    Free = libc::MADV_FREE,
324
325    /// **MADV_REMOVE** - Linux only (since Linux 2.6.16)
326    ///
327    /// Free up a given range of pages and its associated backing
328    /// store.  This is equivalent to punching a hole in the
329    /// corresponding byte range of the backing store (see
330    /// fallocate(2)).  Subsequent accesses in the specified
331    /// address range will see bytes containing zero.
332    ///
333    /// The specified address range must be mapped shared and
334    /// writable.  This flag cannot be applied to locked pages,
335    /// Huge TLB pages, or VM_PFNMAP pages.
336    ///
337    /// In the initial implementation, only tmpfs(5) was supported
338    /// **MADV_REMOVE**; but since Linux 3.5, any filesystem which
339    /// supports the fallocate(2) FALLOC_FL_PUNCH_HOLE mode also
340    /// supports MADV_REMOVE.  Hugetlbfs fails with the error
341    /// EINVAL and other filesystems fail with the error
342    /// EOPNOTSUPP.
343    ///
344    /// # Safety
345    ///
346    /// Using the returned value with conceptually write to the
347    /// mapped pages, i.e. borrowing the mapping when the pages
348    /// are freed results in undefined behaviour.
349    #[cfg(target_os = "linux")]
350    Remove = libc::MADV_REMOVE,
351
352    /// **MADV_FREE_REUSABLE** - Darwin only
353    ///
354    /// Behaves like **MADV_FREE**, but the freed pages are accounted for in the RSS of the process.
355    ///
356    /// # Safety
357    ///
358    /// Using the returned value with conceptually write to the
359    /// mapped pages, i.e. borrowing the mapping while the pages
360    /// are still being freed results in undefined behaviour.
361    #[cfg(any(target_os = "macos", target_os = "ios"))]
362    FreeReusable = libc::MADV_FREE_REUSABLE,
363
364    /// **MADV_FREE_REUSE** - Darwin only
365    ///
366    /// Marks a memory region previously freed by **MADV_FREE_REUSABLE** as non-reusable, accounts
367    /// for the pages in the RSS of the process. Pages that have been freed will be replaced by
368    /// zero-filled pages on demand, other pages will be left as is.
369    ///
370    /// # Safety
371    ///
372    /// Using the returned value with conceptually write to the
373    /// mapped pages, i.e. borrowing the mapping while the pages
374    /// are still being freed results in undefined behaviour.
375    #[cfg(any(target_os = "macos", target_os = "ios"))]
376    FreeReuse = libc::MADV_FREE_REUSE,
377}
378
379// Future expansion:
380// MADV_SOFT_OFFLINE  (since Linux 2.6.33)
381// MADV_WIPEONFORK  (since Linux 4.14)
382// MADV_KEEPONFORK  (since Linux 4.14)
383// MADV_COLD  (since Linux 5.4)
384// MADV_PAGEOUT  (since Linux 5.4)
385
386#[cfg(target_os = "linux")]
387impl Advice {
388    /// Performs a runtime check if this advice is supported by the kernel.
389    /// Only supported on Linux. See the [`madvise(2)`] man page.
390    ///
391    /// [`madvise(2)`]: https://man7.org/linux/man-pages/man2/madvise.2.html#VERSIONS
392    pub fn is_supported(self) -> bool {
393        (unsafe { libc::madvise(std::ptr::null_mut(), 0, self as libc::c_int) }) == 0
394    }
395}
396
397#[cfg(target_os = "linux")]
398impl UncheckedAdvice {
399    /// Performs a runtime check if this advice is supported by the kernel.
400    /// Only supported on Linux. See the [`madvise(2)`] man page.
401    ///
402    /// [`madvise(2)`]: https://man7.org/linux/man-pages/man2/madvise.2.html#VERSIONS
403    pub fn is_supported(self) -> bool {
404        (unsafe { libc::madvise(std::ptr::null_mut(), 0, self as libc::c_int) }) == 0
405    }
406}
407
408#[cfg(test)]
409mod tests {
410    #[cfg(target_os = "linux")]
411    #[test]
412    fn test_is_supported() {
413        use super::*;
414
415        assert!(Advice::Normal.is_supported());
416        assert!(Advice::Random.is_supported());
417        assert!(Advice::Sequential.is_supported());
418        assert!(Advice::WillNeed.is_supported());
419
420        assert!(UncheckedAdvice::DontNeed.is_supported());
421    }
422}