wright/source_tracking/
fragment.rs

1//! [Fragment] struct and implementation for dealing with fragments of source code.
2
3use super::SourceRef;
4use derive_more::Display;
5use std::{ops::Range, str::Chars, sync::Arc};
6
7#[cfg(doc)]
8use crate::source_tracking::source::Source;
9
10/// A fragment of source code.
11///
12/// This can be part of (or all of) a [Source].
13#[derive(Clone, Debug, Display)]
14#[display("{}", "self.as_str()")]
15pub struct Fragment {
16    /// The [Source] that this fragment is in.
17    pub source: SourceRef,
18
19    /// Fragments are represented using byte ranges in the [Source] referenced by [Fragment::source].
20    ///
21    /// This [Fragment] is considered invalid if this range is out of order or either end of it is not
22    /// on a char boundary in source according to [str::is_char_boundary].
23    pub range: Range<usize>,
24}
25
26impl Fragment {
27    /// Check that this [Fragment] is valid, and references a real existing (though possibly empty) part of
28    /// the [Fragment::source].
29    pub fn is_valid(&self) -> bool {
30        // Get a string reference to the whole source.
31        let source_as_str: &str = self.source.source().as_ref();
32
33        // Check validity.
34        self.range.end >= self.range.start
35            && source_as_str.is_char_boundary(self.range.start)
36            && source_as_str.is_char_boundary(self.range.end)
37    }
38
39    /// Get the [str] represented by this [Fragment].
40    ///
41    /// # Panics
42    /// - This will [panic] in the unlikely event that [Fragment::range] is out of bounds or lands between char
43    ///     boundaries for [Fragment::source].
44    pub fn as_str(&self) -> &str {
45        &self.source.source().as_str()[self.range.clone()]
46    }
47
48    /// Get the length (in bytes) of this [Fragment].
49    /// Does not check this [Fragment] for validity.
50    pub const fn len(&self) -> usize {
51        self.range.end.saturating_sub(self.range.start)
52    }
53
54    /// Check if this fragment has a [`Fragment::len`] `== 0`.
55    /// Does not check this [Fragment] for validity.
56    pub const fn is_empty(&self) -> bool {
57        self.len() == 0
58    }
59
60    /// Check if this fragment is empty at the end of it's source.
61    ///
62    /// Uses [debug_assert] to check for validity.
63    pub fn is_empty_at_end_of_source(&self) -> bool {
64        debug_assert!(self.is_valid());
65
66        self.source.source().as_str().len() <= self.range.start
67    }
68
69    /// Return true if this [Fragment] entirely contains another [Fragment] and they're from the same [Source] by
70    /// [Source::id].
71    ///
72    /// If `other` is empty, it can still be considered to be contained in this [Fragment] if its
73    /// [Fragment::range] is entirely within `self`'s [Fragment::range] (basically whether the location of the empty
74    /// fragment is in this one).
75    pub fn contains(&self, other: &Self) -> bool {
76        self.source.id == other.source.id
77            && self.range.start <= other.range.start
78            && self.range.end >= other.range.end
79    }
80
81    /// Get the number of bytes between the beginning of `origin` and the beginning of `self`.
82    ///
83    /// # Panics:
84    /// - Panics if `self` is not a [Fragment] within `origin` according to [`Fragment::contains`].
85    pub fn offset_from(&self, origin: &Self) -> usize {
86        if !origin.contains(self) {
87            panic!("This fragment must be contained in the original fragment");
88        }
89
90        self.range.start - origin.range.start
91    }
92
93    /// Get a [Chars] [Iterator] over the [char]acters in this [Fragment].
94    pub fn chars(&self) -> Chars<'_> {
95        self.as_str().chars()
96    }
97
98    /// Get a sub-fragment of this fragment (see [Fragment::contains]) with the whitespace at either end trimmed off.
99    /// This will return the fragment unchanged if it is empty.
100    ///
101    /// This calls [Fragment::trim_start] and then [Fragment::trim_end] internally and should match the behavior of
102    /// [str::trim].
103    ///
104    /// If this returns an empty [Fragment] it will be at the end of the parent [Fragment].
105    pub fn trimmed(self) -> Self {
106        self.trim_start().trim_end()
107    }
108
109    /// Get a sub-fragment of this fragment (see [Fragment::contains]) with the whitespace trimmed off the end.
110    /// This will return it unchanged if empty.
111    ///
112    /// See [str::trim_end] for exact behaviors.
113    pub fn trim_end(mut self) -> Self {
114        // Get the string representation of this fragment.
115        let original_str: &str = self.as_str();
116        // Trim it.
117        let trimmed_str: &str = original_str.trim_end();
118        // Calculate the new end of the range.
119        let new_end: usize = self.range.start + trimmed_str.len();
120        // Update self.
121        self.range = self.range.start..new_end;
122        // Return the updated self.
123        self
124    }
125
126    /// Get a sub-fragment of this fragment (see [Fragment::contains]) with the whitespace trimmed off the start.
127    /// This will return it unchanged if empty.
128    ///
129    /// See [str::trim_start] for exact behaviors.
130    pub fn trim_start(mut self) -> Self {
131        // Get the string representation of this fragment.
132        let original_str: &str = self.as_str();
133        // Trim it.
134        let trimmed_str: &str = original_str.trim_start();
135        // Calculate the new start of the range.
136        let new_start: usize = self.range.end - trimmed_str.len();
137        // Update self.
138        self.range = new_start..self.range.end;
139        // Return the updated self.
140        self
141    }
142
143    /// Split this [Fragment] into two sub-[Fragment]s, the left containing the first `bytes_from_start`
144    /// bytes, and the right containing the rest.
145    ///
146    /// # Panics
147    /// - This will panic if the provided `bytes_from_start` does not land on a unicode character boundary or is larger
148    ///     than the length of this fragment according to [str::is_char_boundary].
149    pub fn split_at(&self, bytes_from_start: usize) -> (Self, Self) {
150        // Check boundaries.
151        if !self.as_str().is_char_boundary(bytes_from_start) {
152            panic!("Cannot split in the middle of a unicode character");
153        }
154
155        self.split_at_unchecked(bytes_from_start)
156    }
157
158    /// This is the same as [Fragment::split_at] except it does not check that the created fragments are valid or
159    /// that either can call [Fragment::as_str] without panicking.
160    /// Use with caution.
161    ///
162    /// Note that this is not technically `unsafe`, since all bugs that may emerge from abuse/misuse here are logic
163    /// bugs (not memory or concurrency bugs).
164    pub fn split_at_unchecked(&self, bytes_from_start: usize) -> (Self, Self) {
165        // Calculate ranges.
166        let left_range: Range<usize> = self.range.start..(self.range.start + bytes_from_start);
167        let right_range: Range<usize> = (self.range.start + bytes_from_start)..self.range.end;
168
169        // Construct fragments.
170        (
171            Fragment {
172                source: self.source.clone(),
173                range: left_range,
174            },
175            Fragment {
176                source: self.source.clone(),
177                range: right_range,
178            },
179        )
180    }
181
182    /// Move the start of this [Fragment] forward by a given number of bytes.
183    ///
184    /// # Panics
185    /// - Panics if the advancing by `bytes` would create an invalid [Fragment].
186    pub fn advance_by(&mut self, bytes: usize) {
187        // Bounds check.
188        if !self.as_str().is_char_boundary(bytes) {
189            panic!("Advancing by {bytes} bytes would create an invalid fragment.");
190        }
191
192        self.advance_by_unchecked(bytes);
193    }
194
195    /// This is the same as [Fragment::advance_by] except without the bounds checking. Use carefully or the updated
196    /// [Fragment] will be invalid.
197    #[inline]
198    pub fn advance_by_unchecked(&mut self, bytes: usize) {
199        self.range.start += bytes;
200    }
201
202    /// Retain up to `bytes` bytes of this [Fragment].
203    ///
204    /// # Panics
205    /// - Panics if the updated [Fragment] would be invalid.
206    pub fn retain(&mut self, bytes: usize) {
207        // Bounds check.
208        if !self.as_str().is_char_boundary(bytes) {
209            panic!("Retaining to {bytes} bytes would create an invalid fragment.");
210        }
211
212        self.retain_unchecked(bytes);
213    }
214
215    /// This is the same as [Fragment::retain] except without the bounds checking. Use carefully or the updated
216    /// [Fragment] will be invalid.
217    #[inline]
218    pub fn retain_unchecked(&mut self, bytes: usize) {
219        self.range.end = self.range.start + bytes;
220    }
221
222    /// Get a [Range] of line indices (0-indexed, see [Source::get_line]) that this fragment overlaps.
223    pub fn line_indices(&self) -> Range<usize> {
224        let start_line_index: usize = self.source.line_index(self.range.start);
225
226        // Subtract one when doing the end because if this fragment ends at the end of a line, we don't want to include
227        // the next line (obo -- range is exclusive).
228        let ending_line_index: usize = self.source.line_index(self.range.end - 1);
229
230        // Return the range.
231        start_line_index..ending_line_index
232    }
233
234    /// Get the line number (not index) that this line starts on.
235    ///
236    /// This re-calculates [Fragment::line_indices], which may be expensive on very large files, so use with care.
237    pub fn starts_on_line(&self) -> usize {
238        self.line_indices().start + 1
239    }
240
241    /// Get the number of bytes between the start of the line that this [Fragment] starts on and the start of this
242    /// [Fragment]
243    pub fn starting_col_index(&self) -> usize {
244        let line_start_index = Arc::clone(&self.source)
245            .get_line(self.line_indices().start)
246            .range
247            .start;
248
249        self.range.start - line_start_index
250    }
251
252    /// Compute the "cover" over two [Fragment]s -- that is, the [Fragment] containing both of them and all the source
253    /// code in between.
254    ///
255    /// # Panics
256    /// - If the [Fragment]s are from different sources.
257    pub fn cover(lhs: Fragment, rhs: Fragment) -> Fragment {
258        use std::cmp;
259        assert_eq!(lhs.source.id, rhs.source.id, "fragments must be from same source");
260
261        Fragment {
262            source: lhs.source,
263            range: cmp::min(lhs.range.start, rhs.range.start)
264                ..cmp::max(lhs.range.end, rhs.range.end),
265        }
266    }
267}
268
269impl PartialEq for Fragment {
270    /// Fragment equality is based on referencing the same [Source] using [Arc::ptr_eq] and having the same
271    /// [Fragment::range].
272    fn eq(&self, other: &Self) -> bool {
273        self.source.id == other.source.id && self.range == other.range
274    }
275}
276
277impl Eq for Fragment {}
278
279#[cfg(test)]
280mod tests {
281    use super::Fragment;
282    use crate::source_tracking::{filename::FileName, source::Source};
283    use std::sync::Arc;
284
285    /// Utility function to create a one-off fragment over a static string.
286    fn from_static(s: &'static str) -> Fragment {
287        let source = Source::new_from_static_str(FileName::None, s);
288        let arc = Arc::new(source);
289
290        Fragment {
291            range: 0..arc.source().as_ref().len(),
292            source: arc,
293        }
294    }
295
296    #[test]
297    fn test_split_single() {
298        let a = from_static("+");
299        let (left, right) = a.split_at(1);
300        assert_eq!(left.as_str(), "+");
301        assert_eq!(right.as_str(), "");
302    }
303
304    #[test]
305    fn test_offset_from() {
306        let a = from_static("abcde");
307        let (b, c) = a.split_at(2);
308        assert_eq!(b.offset_from(&a), 0);
309        assert_eq!(c.offset_from(&a), 2);
310    }
311
312    #[test]
313    #[should_panic]
314    fn test_offset_panics() {
315        let a = from_static("abc");
316        let b = from_static("def");
317        a.offset_from(&b);
318    }
319
320    #[test]
321    fn test_trimmed_is_contained() {
322        let a = from_static("  aa aa  ");
323        let b = a.clone().trimmed();
324        assert!(a.contains(&b));
325        assert_eq!(b.len(), 5);
326    }
327
328    #[test]
329    fn trimmed_empty() {
330        let empty = from_static("");
331        assert_eq!(empty.clone().trimmed(), empty);
332    }
333
334    #[test]
335    fn trimmed_whitespace() {
336        let w = from_static("  ");
337        assert!(w.clone().trimmed().is_empty());
338    }
339}