wright/source_tracking/fragment.rs
1//! [Fragment] struct and implementation for dealing with fragments of source code.
2
3use super::SourceRef;
4use derive_more::Display;
5use std::{ops::Range, str::Chars, sync::Arc};
6
7#[cfg(doc)]
8use crate::source_tracking::source::Source;
9
10/// A fragment of source code.
11///
12/// This can be part of (or all of) a [Source].
13#[derive(Clone, Debug, Display)]
14#[display("{}", "self.as_str()")]
15pub struct Fragment {
16 /// The [Source] that this fragment is in.
17 pub source: SourceRef,
18
19 /// Fragments are represented using byte ranges in the [Source] referenced by [Fragment::source].
20 ///
21 /// This [Fragment] is considered invalid if this range is out of order or either end of it is not
22 /// on a char boundary in source according to [str::is_char_boundary].
23 pub range: Range<usize>,
24}
25
26impl Fragment {
27 /// Check that this [Fragment] is valid, and references a real existing (though possibly empty) part of
28 /// the [Fragment::source].
29 pub fn is_valid(&self) -> bool {
30 // Get a string reference to the whole source.
31 let source_as_str: &str = self.source.source().as_ref();
32
33 // Check validity.
34 self.range.end >= self.range.start
35 && source_as_str.is_char_boundary(self.range.start)
36 && source_as_str.is_char_boundary(self.range.end)
37 }
38
39 /// Get the [str] represented by this [Fragment].
40 ///
41 /// # Panics
42 /// - This will [panic] in the unlikely event that [Fragment::range] is out of bounds or lands between char
43 /// boundaries for [Fragment::source].
44 pub fn as_str(&self) -> &str {
45 &self.source.source().as_str()[self.range.clone()]
46 }
47
48 /// Get the length (in bytes) of this [Fragment].
49 /// Does not check this [Fragment] for validity.
50 pub const fn len(&self) -> usize {
51 self.range.end.saturating_sub(self.range.start)
52 }
53
54 /// Check if this fragment has a [`Fragment::len`] `== 0`.
55 /// Does not check this [Fragment] for validity.
56 pub const fn is_empty(&self) -> bool {
57 self.len() == 0
58 }
59
60 /// Check if this fragment is empty at the end of it's source.
61 ///
62 /// Uses [debug_assert] to check for validity.
63 pub fn is_empty_at_end_of_source(&self) -> bool {
64 debug_assert!(self.is_valid());
65
66 self.source.source().as_str().len() <= self.range.start
67 }
68
69 /// Return true if this [Fragment] entirely contains another [Fragment] and they're from the same [Source] by
70 /// [Source::id].
71 ///
72 /// If `other` is empty, it can still be considered to be contained in this [Fragment] if its
73 /// [Fragment::range] is entirely within `self`'s [Fragment::range] (basically whether the location of the empty
74 /// fragment is in this one).
75 pub fn contains(&self, other: &Self) -> bool {
76 self.source.id == other.source.id
77 && self.range.start <= other.range.start
78 && self.range.end >= other.range.end
79 }
80
81 /// Get the number of bytes between the beginning of `origin` and the beginning of `self`.
82 ///
83 /// # Panics:
84 /// - Panics if `self` is not a [Fragment] within `origin` according to [`Fragment::contains`].
85 pub fn offset_from(&self, origin: &Self) -> usize {
86 if !origin.contains(self) {
87 panic!("This fragment must be contained in the original fragment");
88 }
89
90 self.range.start - origin.range.start
91 }
92
93 /// Get a [Chars] [Iterator] over the [char]acters in this [Fragment].
94 pub fn chars(&self) -> Chars<'_> {
95 self.as_str().chars()
96 }
97
98 /// Get a sub-fragment of this fragment (see [Fragment::contains]) with the whitespace at either end trimmed off.
99 /// This will return the fragment unchanged if it is empty.
100 ///
101 /// This calls [Fragment::trim_start] and then [Fragment::trim_end] internally and should match the behavior of
102 /// [str::trim].
103 ///
104 /// If this returns an empty [Fragment] it will be at the end of the parent [Fragment].
105 pub fn trimmed(self) -> Self {
106 self.trim_start().trim_end()
107 }
108
109 /// Get a sub-fragment of this fragment (see [Fragment::contains]) with the whitespace trimmed off the end.
110 /// This will return it unchanged if empty.
111 ///
112 /// See [str::trim_end] for exact behaviors.
113 pub fn trim_end(mut self) -> Self {
114 // Get the string representation of this fragment.
115 let original_str: &str = self.as_str();
116 // Trim it.
117 let trimmed_str: &str = original_str.trim_end();
118 // Calculate the new end of the range.
119 let new_end: usize = self.range.start + trimmed_str.len();
120 // Update self.
121 self.range = self.range.start..new_end;
122 // Return the updated self.
123 self
124 }
125
126 /// Get a sub-fragment of this fragment (see [Fragment::contains]) with the whitespace trimmed off the start.
127 /// This will return it unchanged if empty.
128 ///
129 /// See [str::trim_start] for exact behaviors.
130 pub fn trim_start(mut self) -> Self {
131 // Get the string representation of this fragment.
132 let original_str: &str = self.as_str();
133 // Trim it.
134 let trimmed_str: &str = original_str.trim_start();
135 // Calculate the new start of the range.
136 let new_start: usize = self.range.end - trimmed_str.len();
137 // Update self.
138 self.range = new_start..self.range.end;
139 // Return the updated self.
140 self
141 }
142
143 /// Split this [Fragment] into two sub-[Fragment]s, the left containing the first `bytes_from_start`
144 /// bytes, and the right containing the rest.
145 ///
146 /// # Panics
147 /// - This will panic if the provided `bytes_from_start` does not land on a unicode character boundary or is larger
148 /// than the length of this fragment according to [str::is_char_boundary].
149 pub fn split_at(&self, bytes_from_start: usize) -> (Self, Self) {
150 // Check boundaries.
151 if !self.as_str().is_char_boundary(bytes_from_start) {
152 panic!("Cannot split in the middle of a unicode character");
153 }
154
155 self.split_at_unchecked(bytes_from_start)
156 }
157
158 /// This is the same as [Fragment::split_at] except it does not check that the created fragments are valid or
159 /// that either can call [Fragment::as_str] without panicking.
160 /// Use with caution.
161 ///
162 /// Note that this is not technically `unsafe`, since all bugs that may emerge from abuse/misuse here are logic
163 /// bugs (not memory or concurrency bugs).
164 pub fn split_at_unchecked(&self, bytes_from_start: usize) -> (Self, Self) {
165 // Calculate ranges.
166 let left_range: Range<usize> = self.range.start..(self.range.start + bytes_from_start);
167 let right_range: Range<usize> = (self.range.start + bytes_from_start)..self.range.end;
168
169 // Construct fragments.
170 (
171 Fragment {
172 source: self.source.clone(),
173 range: left_range,
174 },
175 Fragment {
176 source: self.source.clone(),
177 range: right_range,
178 },
179 )
180 }
181
182 /// Move the start of this [Fragment] forward by a given number of bytes.
183 ///
184 /// # Panics
185 /// - Panics if the advancing by `bytes` would create an invalid [Fragment].
186 pub fn advance_by(&mut self, bytes: usize) {
187 // Bounds check.
188 if !self.as_str().is_char_boundary(bytes) {
189 panic!("Advancing by {bytes} bytes would create an invalid fragment.");
190 }
191
192 self.advance_by_unchecked(bytes);
193 }
194
195 /// This is the same as [Fragment::advance_by] except without the bounds checking. Use carefully or the updated
196 /// [Fragment] will be invalid.
197 #[inline]
198 pub fn advance_by_unchecked(&mut self, bytes: usize) {
199 self.range.start += bytes;
200 }
201
202 /// Retain up to `bytes` bytes of this [Fragment].
203 ///
204 /// # Panics
205 /// - Panics if the updated [Fragment] would be invalid.
206 pub fn retain(&mut self, bytes: usize) {
207 // Bounds check.
208 if !self.as_str().is_char_boundary(bytes) {
209 panic!("Retaining to {bytes} bytes would create an invalid fragment.");
210 }
211
212 self.retain_unchecked(bytes);
213 }
214
215 /// This is the same as [Fragment::retain] except without the bounds checking. Use carefully or the updated
216 /// [Fragment] will be invalid.
217 #[inline]
218 pub fn retain_unchecked(&mut self, bytes: usize) {
219 self.range.end = self.range.start + bytes;
220 }
221
222 /// Get a [Range] of line indices (0-indexed, see [Source::get_line]) that this fragment overlaps.
223 pub fn line_indices(&self) -> Range<usize> {
224 let start_line_index: usize = self.source.line_index(self.range.start);
225
226 // Subtract one when doing the end because if this fragment ends at the end of a line, we don't want to include
227 // the next line (obo -- range is exclusive).
228 let ending_line_index: usize = self.source.line_index(self.range.end - 1);
229
230 // Return the range.
231 start_line_index..ending_line_index
232 }
233
234 /// Get the line number (not index) that this line starts on.
235 ///
236 /// This re-calculates [Fragment::line_indices], which may be expensive on very large files, so use with care.
237 pub fn starts_on_line(&self) -> usize {
238 self.line_indices().start + 1
239 }
240
241 /// Get the number of bytes between the start of the line that this [Fragment] starts on and the start of this
242 /// [Fragment]
243 pub fn starting_col_index(&self) -> usize {
244 let line_start_index = Arc::clone(&self.source)
245 .get_line(self.line_indices().start)
246 .range
247 .start;
248
249 self.range.start - line_start_index
250 }
251
252 /// Compute the "cover" over two [Fragment]s -- that is, the [Fragment] containing both of them and all the source
253 /// code in between.
254 ///
255 /// # Panics
256 /// - If the [Fragment]s are from different sources.
257 pub fn cover(lhs: Fragment, rhs: Fragment) -> Fragment {
258 use std::cmp;
259 assert_eq!(lhs.source.id, rhs.source.id, "fragments must be from same source");
260
261 Fragment {
262 source: lhs.source,
263 range: cmp::min(lhs.range.start, rhs.range.start)
264 ..cmp::max(lhs.range.end, rhs.range.end),
265 }
266 }
267}
268
269impl PartialEq for Fragment {
270 /// Fragment equality is based on referencing the same [Source] using [Arc::ptr_eq] and having the same
271 /// [Fragment::range].
272 fn eq(&self, other: &Self) -> bool {
273 self.source.id == other.source.id && self.range == other.range
274 }
275}
276
277impl Eq for Fragment {}
278
279#[cfg(test)]
280mod tests {
281 use super::Fragment;
282 use crate::source_tracking::{filename::FileName, source::Source};
283 use std::sync::Arc;
284
285 /// Utility function to create a one-off fragment over a static string.
286 fn from_static(s: &'static str) -> Fragment {
287 let source = Source::new_from_static_str(FileName::None, s);
288 let arc = Arc::new(source);
289
290 Fragment {
291 range: 0..arc.source().as_ref().len(),
292 source: arc,
293 }
294 }
295
296 #[test]
297 fn test_split_single() {
298 let a = from_static("+");
299 let (left, right) = a.split_at(1);
300 assert_eq!(left.as_str(), "+");
301 assert_eq!(right.as_str(), "");
302 }
303
304 #[test]
305 fn test_offset_from() {
306 let a = from_static("abcde");
307 let (b, c) = a.split_at(2);
308 assert_eq!(b.offset_from(&a), 0);
309 assert_eq!(c.offset_from(&a), 2);
310 }
311
312 #[test]
313 #[should_panic]
314 fn test_offset_panics() {
315 let a = from_static("abc");
316 let b = from_static("def");
317 a.offset_from(&b);
318 }
319
320 #[test]
321 fn test_trimmed_is_contained() {
322 let a = from_static(" aa aa ");
323 let b = a.clone().trimmed();
324 assert!(a.contains(&b));
325 assert_eq!(b.len(), 5);
326 }
327
328 #[test]
329 fn trimmed_empty() {
330 let empty = from_static("");
331 assert_eq!(empty.clone().trimmed(), empty);
332 }
333
334 #[test]
335 fn trimmed_whitespace() {
336 let w = from_static(" ");
337 assert!(w.clone().trimmed().is_empty());
338 }
339}