wright/source_tracking/immutable_string.rs
1//! Structure and implementation relating to the representation of source files (as immutable strings) throughout
2//! the Wright compiler and tooling.
3
4use std::{str::CharIndices, sync::Arc};
5
6#[cfg(feature = "file_memmap")]
7use fs4::fs_std::FileExt;
8
9#[cfg(feature = "file_memmap")]
10use memmap2::Mmap;
11
12#[cfg(feature = "file_memmap")]
13use std::{fs::File, io};
14
15/// An immutable string that either
16/// - References a source string in memory using a `'static` reference,
17/// - Owns a source string in memory.
18/// - Owns a locked and memory mapped file from the disk.
19///
20/// This uses an [Arc] internally to make cloning cheap.
21#[derive(Debug, Clone)]
22pub struct ImmutableString {
23 /// Wrap the internal enum representation. This is to avoid exposing the API for a user to construct an
24 /// [ImmutableStringInner] without satisfying certain invariants.
25 inner: Arc<ImmutableStringInner>,
26}
27
28impl ImmutableString {
29 /// Wrap the inner representation in this type.
30 #[inline]
31 fn from_inner(inner: ImmutableStringInner) -> Self {
32 ImmutableString {
33 inner: Arc::new(inner),
34 }
35 }
36
37 /// Create a new [ImmutableString] holding the given [File] (assumed to be locked with [fs4])
38 /// and the [Mmap] mapping that file to memory.
39 ///
40 /// This function requires that the memory mapped by the given
41 /// [Mmap] is valid UTF-8 using [std::str::from_utf8].
42 #[cfg(feature = "file_memmap")]
43 pub(super) fn new_locked_file(file: File, mem_map: Mmap) -> Self {
44 Self::from_inner(ImmutableStringInner::LockedFile {
45 locked_file: file,
46 mem_map,
47 })
48 }
49
50 /// Create a new [ImmutableString] that owns a string allocated on the heap.
51 pub(super) fn new_owned(boxed_str: Box<str>) -> Self {
52 Self::from_inner(ImmutableStringInner::Owned(boxed_str))
53 }
54
55 /// Create a new [ImmutableString] referencing a string directly.
56 pub(super) fn new_static(str_ref: &'static str) -> Self {
57 Self::from_inner(ImmutableStringInner::Static(str_ref))
58 }
59
60 /// Get a list of byte indices into this [ImmutableString] of the start of every line.
61 pub fn line_starts(&self) -> impl Iterator<Item = usize> + use<'_> {
62 // Make a iterator over this string's characters and their byte indices.
63 let mut char_indices: CharIndices = self.as_ref().char_indices();
64 // Track whether the previous character was a newline using a bool -- this starts as true, so that the first
65 // character of a source is considered to be starting a newline.
66 let mut last_was_newline: bool = true;
67
68 // Create a custom iterator that flattens to give us indices immediately following \n characters.
69 let iter = std::iter::from_fn(move || {
70 // If the next char indice is none, return none. There are no lines on empty strings.
71 let (index, next) = char_indices.next()?;
72
73 // Determine whether to list this character's index as starting a new line.
74 let result = Some(last_was_newline.then_some(index));
75
76 // Update the boolean based on the consumed character.
77 last_was_newline = next == '\n';
78
79 // Return the above result.
80 result
81 });
82
83 iter.flatten()
84 }
85
86 /// Get this [ImmutableString] as a [str] reference.
87 /// This just calls [AsRef::as_ref].
88 pub fn as_str(&self) -> &str {
89 self.as_ref()
90 }
91
92 /// Get the length of this [ImmutableString] in bytes.
93 /// See [str::len].
94 pub fn len(&self) -> usize {
95 self.as_str().len()
96 }
97
98 /// Check if this [ImmutableString] is empty.
99 #[inline]
100 pub fn is_empty(&self) -> bool {
101 self.len() == 0
102 }
103}
104
105impl AsRef<str> for ImmutableString {
106 fn as_ref(&self) -> &str {
107 (*self.inner).as_ref()
108 }
109}
110
111/// The internal enum representation of the immutable string.
112#[derive(Debug)]
113enum ImmutableStringInner {
114 /// An immutable reference to an existing static string.
115 Static(&'static str),
116
117 /// An owned immutable string.
118 Owned(Box<str>),
119
120 /// A locked, memory mapped file from the disk.
121 #[cfg(feature = "file_memmap")]
122 LockedFile {
123 /// The locked file that gets unlocked when this struct is dropped.
124 locked_file: File,
125
126 /// The memory mapped file.
127 ///
128 /// # Safety
129 /// - Undefined behavior occurs if the file on disk is modified while memory mapped. Always lock the
130 /// file (in this crate's case, using [fs4]) before creating this [Mmap] for it.
131 /// See [Mmap] for more details.
132 /// - This struct assumes that any memory-mapped files have their UTF-8 validity checked by the caller.
133 /// Specificically the [ImmutableString::as_ref] method relies on [std::str::from_utf8_unchecked],
134 /// so if you do not ensure the [Mmap] is valid UTF-8, you will run into undefined behavior.
135 mem_map: Mmap,
136 },
137}
138
139/// Implement [Drop] to make sure that the files from disk get unlocked as they go out of scope/use.
140#[cfg(feature = "file_memmap")]
141impl Drop for ImmutableStringInner {
142 fn drop(&mut self) {
143 match self {
144 // Unlock locked files.
145 ImmutableStringInner::LockedFile { locked_file, .. } => {
146 FileExt::unlock(locked_file)
147 // Log the error if there is one,
148 .map_err(|io_err: io::Error| eprintln!("{}", io_err))
149 // Discard value of result
150 .ok();
151 }
152
153 // All other types drop trivially.
154 ImmutableStringInner::Owned(_) | ImmutableStringInner::Static(_) => {}
155 }
156 }
157}
158
159impl AsRef<str> for ImmutableStringInner {
160 fn as_ref(&self) -> &str {
161 match self {
162 ImmutableStringInner::Static(str) => str,
163 ImmutableStringInner::Owned(str) => str,
164
165 #[cfg(feature = "file_memmap")]
166 ImmutableStringInner::LockedFile { mem_map, .. } => {
167 // Get a direct reference to the data that is in the memory map.
168 let raw_data: &[u8] = mem_map.as_ref();
169 // SAFETY: UTF-8 validity is checked when the file is added to the file map, or by the API consumer.
170 unsafe { std::str::from_utf8_unchecked(raw_data) }
171 }
172 }
173 }
174}
175
176#[cfg(test)]
177mod tests {
178 use super::ImmutableString;
179
180 #[test]
181 fn test_line_starts() {
182 let v: Vec<usize> = ImmutableString::new_static("a\n\nb\nc")
183 .line_starts()
184 .collect();
185
186 assert_eq!(v.as_slice(), &[0, 2, 3, 5]);
187 }
188}