wright/source_tracking/
immutable_string.rs

1//! Structure and implementation relating to the representation of source files (as immutable strings) throughout
2//! the Wright compiler and tooling.
3
4use std::{str::CharIndices, sync::Arc};
5
6#[cfg(feature = "file_memmap")]
7use fs4::fs_std::FileExt;
8
9#[cfg(feature = "file_memmap")]
10use memmap2::Mmap;
11
12#[cfg(feature = "file_memmap")]
13use std::{fs::File, io};
14
15/// An immutable string that either
16/// - References a source string in memory using a `'static` reference,
17/// - Owns a source string in memory.
18/// - Owns a locked and memory mapped file from the disk.
19///
20/// This uses an [Arc] internally to make cloning cheap.
21#[derive(Debug, Clone)]
22pub struct ImmutableString {
23    /// Wrap the internal enum representation. This is to avoid exposing the API for a user to construct an
24    /// [ImmutableStringInner] without satisfying certain invariants.
25    inner: Arc<ImmutableStringInner>,
26}
27
28impl ImmutableString {
29    /// Wrap the inner representation in this type.
30    #[inline]
31    fn from_inner(inner: ImmutableStringInner) -> Self {
32        ImmutableString {
33            inner: Arc::new(inner),
34        }
35    }
36
37    /// Create a new [ImmutableString] holding the given [File] (assumed to be locked with [fs4])
38    /// and the [Mmap] mapping that file to memory.
39    ///
40    /// This function requires that the memory mapped by the given
41    /// [Mmap] is valid UTF-8 using [std::str::from_utf8].
42    #[cfg(feature = "file_memmap")]
43    pub(super) fn new_locked_file(file: File, mem_map: Mmap) -> Self {
44        Self::from_inner(ImmutableStringInner::LockedFile {
45            locked_file: file,
46            mem_map,
47        })
48    }
49
50    /// Create a new [ImmutableString] that owns a string allocated on the heap.
51    pub(super) fn new_owned(boxed_str: Box<str>) -> Self {
52        Self::from_inner(ImmutableStringInner::Owned(boxed_str))
53    }
54
55    /// Create a new [ImmutableString] referencing a string directly.
56    pub(super) fn new_static(str_ref: &'static str) -> Self {
57        Self::from_inner(ImmutableStringInner::Static(str_ref))
58    }
59
60    /// Get a list of byte indices into this [ImmutableString] of the start of every line.
61    pub fn line_starts(&self) -> impl Iterator<Item = usize> + use<'_> {
62        // Make a iterator over this string's characters and their byte indices.
63        let mut char_indices: CharIndices = self.as_ref().char_indices();
64        // Track whether the previous character was a newline using a bool -- this starts as true, so that the first
65        // character of a source is considered to be starting a newline.
66        let mut last_was_newline: bool = true;
67
68        // Create a custom iterator that flattens to give us indices immediately following \n characters.
69        let iter = std::iter::from_fn(move || {
70            // If the next char indice is none, return none. There are no lines on empty strings.
71            let (index, next) = char_indices.next()?;
72
73            // Determine whether to list this character's index as starting a new line.
74            let result = Some(last_was_newline.then_some(index));
75
76            // Update the boolean based on the consumed character.
77            last_was_newline = next == '\n';
78
79            // Return the above result.
80            result
81        });
82
83        iter.flatten()
84    }
85
86    /// Get this [ImmutableString] as a [str] reference.
87    /// This just calls [AsRef::as_ref].
88    pub fn as_str(&self) -> &str {
89        self.as_ref()
90    }
91
92    /// Get the length of this [ImmutableString] in bytes.
93    /// See [str::len].
94    pub fn len(&self) -> usize {
95        self.as_str().len()
96    }
97
98    /// Check if this [ImmutableString] is empty.
99    #[inline]
100    pub fn is_empty(&self) -> bool {
101        self.len() == 0
102    }
103}
104
105impl AsRef<str> for ImmutableString {
106    fn as_ref(&self) -> &str {
107        (*self.inner).as_ref()
108    }
109}
110
111/// The internal enum representation of the immutable string.
112#[derive(Debug)]
113enum ImmutableStringInner {
114    /// An immutable reference to an existing static string.
115    Static(&'static str),
116
117    /// An owned immutable string.
118    Owned(Box<str>),
119
120    /// A locked, memory mapped file from the disk.
121    #[cfg(feature = "file_memmap")]
122    LockedFile {
123        /// The locked file that gets unlocked when this struct is dropped.
124        locked_file: File,
125
126        /// The memory mapped file.
127        ///
128        /// # Safety
129        /// - Undefined  behavior occurs if the file on disk is modified while memory mapped. Always lock the
130        ///     file (in this crate's case, using [fs4]) before creating this [Mmap] for it.
131        ///     See [Mmap] for more details.
132        /// - This struct assumes that any memory-mapped files have their UTF-8 validity checked by the caller.
133        ///     Specificically the [ImmutableString::as_ref] method relies on [std::str::from_utf8_unchecked],
134        ///     so if you do not ensure the [Mmap] is valid UTF-8, you will run into undefined behavior.
135        mem_map: Mmap,
136    },
137}
138
139/// Implement [Drop] to make sure that the files from disk get unlocked as they go out of scope/use.
140#[cfg(feature = "file_memmap")]
141impl Drop for ImmutableStringInner {
142    fn drop(&mut self) {
143        match self {
144            // Unlock locked files.
145            ImmutableStringInner::LockedFile { locked_file, .. } => {
146                FileExt::unlock(locked_file)
147                    // Log the error if there is one,
148                    .map_err(|io_err: io::Error| eprintln!("{}", io_err))
149                    // Discard value of result
150                    .ok();
151            }
152
153            // All other types drop trivially.
154            ImmutableStringInner::Owned(_) | ImmutableStringInner::Static(_) => {}
155        }
156    }
157}
158
159impl AsRef<str> for ImmutableStringInner {
160    fn as_ref(&self) -> &str {
161        match self {
162            ImmutableStringInner::Static(str) => str,
163            ImmutableStringInner::Owned(str) => str,
164
165            #[cfg(feature = "file_memmap")]
166            ImmutableStringInner::LockedFile { mem_map, .. } => {
167                // Get a direct reference to the data that is in the memory map.
168                let raw_data: &[u8] = mem_map.as_ref();
169                // SAFETY: UTF-8 validity is checked when the file is added to the file map, or by the API consumer.
170                unsafe { std::str::from_utf8_unchecked(raw_data) }
171            }
172        }
173    }
174}
175
176#[cfg(test)]
177mod tests {
178    use super::ImmutableString;
179
180    #[test]
181    fn test_line_starts() {
182        let v: Vec<usize> = ImmutableString::new_static("a\n\nb\nc")
183            .line_starts()
184            .collect();
185
186        assert_eq!(v.as_slice(), &[0, 2, 3, 5]);
187    }
188}