codespan_reporting/
files.rs

1//! Source file support for diagnostic reporting.
2//!
3//! The main trait defined in this module is the [`Files`] trait, which provides
4//! provides the minimum amount of functionality required for printing [`Diagnostics`]
5//! with the [`term::emit`] function.
6//!
7//! Simple implementations of this trait are implemented:
8//!
9//! - [`SimpleFile`]: For single-file use-cases
10//! - [`SimpleFiles`]: For multi-file use-cases
11//!
12//! These data structures provide a pretty minimal API, however,
13//! so end-users are encouraged to create their own implementations for their
14//! own specific use-cases, such as an implementation that accesses the file
15//! system directly (and caches the line start locations), or an implementation
16//! using an incremental compilation library like [`salsa`].
17//!
18//! [`term::emit`]: crate::term::emit
19//! [`Diagnostics`]: crate::diagnostic::Diagnostic
20//! [`Files`]: Files
21//! [`SimpleFile`]: SimpleFile
22//! [`SimpleFiles`]: SimpleFiles
23//!
24//! [`salsa`]: https://crates.io/crates/salsa
25
26use std::ops::Range;
27
28/// An enum representing an error that happened while looking up a file or a piece of content in that file.
29#[derive(Debug)]
30#[non_exhaustive]
31pub enum Error {
32    /// A required file is not in the file database.
33    FileMissing,
34    /// The file is present, but does not contain the specified byte index.
35    IndexTooLarge { given: usize, max: usize },
36    /// The file is present, but does not contain the specified line index.
37    LineTooLarge { given: usize, max: usize },
38    /// The file is present and contains the specified line index, but the line does not contain the specified column index.
39    ColumnTooLarge { given: usize, max: usize },
40    /// The given index is contained in the file, but is not a boundary of a UTF-8 code point.
41    InvalidCharBoundary { given: usize },
42    /// There was a error while doing IO.
43    Io(std::io::Error),
44}
45
46impl From<std::io::Error> for Error {
47    fn from(err: std::io::Error) -> Error {
48        Error::Io(err)
49    }
50}
51
52impl std::fmt::Display for Error {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        match self {
55            Error::FileMissing => write!(f, "file missing"),
56            Error::IndexTooLarge { given, max } => {
57                write!(f, "invalid index {}, maximum index is {}", given, max)
58            }
59            Error::LineTooLarge { given, max } => {
60                write!(f, "invalid line {}, maximum line is {}", given, max)
61            }
62            Error::ColumnTooLarge { given, max } => {
63                write!(f, "invalid column {}, maximum column {}", given, max)
64            }
65            Error::InvalidCharBoundary { .. } => write!(f, "index is not a code point boundary"),
66            Error::Io(err) => write!(f, "{}", err),
67        }
68    }
69}
70
71impl std::error::Error for Error {
72    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
73        match &self {
74            Error::Io(err) => Some(err),
75            _ => None,
76        }
77    }
78}
79
80/// A minimal interface for accessing source files when rendering diagnostics.
81///
82/// A lifetime parameter `'a` is provided to allow any of the returned values to returned by reference.
83/// This is to workaround the lack of higher kinded lifetime parameters.
84/// This can be ignored if this is not needed, however.
85pub trait Files<'a> {
86    /// A unique identifier for files in the file provider. This will be used
87    /// for rendering `diagnostic::Label`s in the corresponding source files.
88    type FileId: 'a + Copy + PartialEq;
89    /// The user-facing name of a file, to be displayed in diagnostics.
90    type Name: 'a + std::fmt::Display;
91    /// The source code of a file.
92    type Source: 'a + AsRef<str>;
93
94    /// The user-facing name of a file.
95    fn name(&'a self, id: Self::FileId) -> Result<Self::Name, Error>;
96
97    /// The source code of a file.
98    fn source(&'a self, id: Self::FileId) -> Result<Self::Source, Error>;
99
100    /// The index of the line at the given byte index.
101    /// If the byte index is past the end of the file, returns the maximum line index in the file.
102    /// This means that this function only fails if the file is not present.
103    ///
104    /// # Note for trait implementors
105    ///
106    /// This can be implemented efficiently by performing a binary search over
107    /// a list of line starts that was computed by calling the [`line_starts`]
108    /// function that is exported from the [`files`] module. It might be useful
109    /// to pre-compute and cache these line starts.
110    ///
111    /// [`line_starts`]: crate::files::line_starts
112    /// [`files`]: crate::files
113    fn line_index(&'a self, id: Self::FileId, byte_index: usize) -> Result<usize, Error>;
114
115    /// The user-facing line number at the given line index.
116    /// It is not necessarily checked that the specified line index
117    /// is actually in the file.
118    ///
119    /// # Note for trait implementors
120    ///
121    /// This is usually 1-indexed from the beginning of the file, but
122    /// can be useful for implementing something like the
123    /// [C preprocessor's `#line` macro][line-macro].
124    ///
125    /// [line-macro]: https://en.cppreference.com/w/c/preprocessor/line
126    #[allow(unused_variables)]
127    fn line_number(&'a self, id: Self::FileId, line_index: usize) -> Result<usize, Error> {
128        Ok(line_index + 1)
129    }
130
131    /// The user-facing column number at the given line index and byte index.
132    ///
133    /// # Note for trait implementors
134    ///
135    /// This is usually 1-indexed from the the start of the line.
136    /// A default implementation is provided, based on the [`column_index`]
137    /// function that is exported from the [`files`] module.
138    ///
139    /// [`files`]: crate::files
140    /// [`column_index`]: crate::files::column_index
141    fn column_number(
142        &'a self,
143        id: Self::FileId,
144        line_index: usize,
145        byte_index: usize,
146    ) -> Result<usize, Error> {
147        let source = self.source(id)?;
148        let line_range = self.line_range(id, line_index)?;
149        let column_index = column_index(source.as_ref(), line_range, byte_index);
150
151        Ok(column_index + 1)
152    }
153
154    /// Convenience method for returning line and column number at the given
155    /// byte index in the file.
156    fn location(&'a self, id: Self::FileId, byte_index: usize) -> Result<Location, Error> {
157        let line_index = self.line_index(id, byte_index)?;
158
159        Ok(Location {
160            line_number: self.line_number(id, line_index)?,
161            column_number: self.column_number(id, line_index, byte_index)?,
162        })
163    }
164
165    /// The byte range of line in the source of the file.
166    fn line_range(&'a self, id: Self::FileId, line_index: usize) -> Result<Range<usize>, Error>;
167}
168
169/// A user-facing location in a source file.
170///
171/// Returned by [`Files::location`].
172///
173/// [`Files::location`]: Files::location
174#[derive(Debug, Copy, Clone, PartialEq, Eq)]
175pub struct Location {
176    /// The user-facing line number.
177    pub line_number: usize,
178    /// The user-facing column number.
179    pub column_number: usize,
180}
181
182/// The column index at the given byte index in the source file.
183/// This is the number of characters to the given byte index.
184///
185/// If the byte index is smaller than the start of the line, then `0` is returned.
186/// If the byte index is past the end of the line, the column index of the last
187/// character `+ 1` is returned.
188///
189/// # Example
190///
191/// ```rust
192/// use codespan_reporting::files;
193///
194/// let source = "\n\nšŸ—»āˆˆšŸŒ\n\n";
195///
196/// assert_eq!(files::column_index(source, 0..1, 0), 0);
197/// assert_eq!(files::column_index(source, 2..13, 0), 0);
198/// assert_eq!(files::column_index(source, 2..13, 2 + 0), 0);
199/// assert_eq!(files::column_index(source, 2..13, 2 + 1), 0);
200/// assert_eq!(files::column_index(source, 2..13, 2 + 4), 1);
201/// assert_eq!(files::column_index(source, 2..13, 2 + 8), 2);
202/// assert_eq!(files::column_index(source, 2..13, 2 + 10), 2);
203/// assert_eq!(files::column_index(source, 2..13, 2 + 11), 3);
204/// assert_eq!(files::column_index(source, 2..13, 2 + 12), 3);
205/// ```
206pub fn column_index(source: &str, line_range: Range<usize>, byte_index: usize) -> usize {
207    let end_index = std::cmp::min(byte_index, std::cmp::min(line_range.end, source.len()));
208
209    (line_range.start..end_index)
210        .filter(|byte_index| source.is_char_boundary(byte_index + 1))
211        .count()
212}
213
214/// Return the starting byte index of each line in the source string.
215///
216/// This can make it easier to implement [`Files::line_index`] by allowing
217/// implementors of [`Files`] to pre-compute the line starts, then search for
218/// the corresponding line range, as shown in the example below.
219///
220/// [`Files`]: Files
221/// [`Files::line_index`]: Files::line_index
222///
223/// # Example
224///
225/// ```rust
226/// use codespan_reporting::files;
227///
228/// let source = "foo\nbar\r\n\nbaz";
229/// let line_starts: Vec<_> = files::line_starts(source).collect();
230///
231/// assert_eq!(
232///     line_starts,
233///     [
234///         0,  // "foo\n"
235///         4,  // "bar\r\n"
236///         9,  // ""
237///         10, // "baz"
238///     ],
239/// );
240///
241/// fn line_index(line_starts: &[usize], byte_index: usize) -> Option<usize> {
242///     match line_starts.binary_search(&byte_index) {
243///         Ok(line) => Some(line),
244///         Err(next_line) => Some(next_line - 1),
245///     }
246/// }
247///
248/// assert_eq!(line_index(&line_starts, 5), Some(1));
249/// ```
250// NOTE: this is copied in `codespan::file::line_starts` and should be kept in sync.
251pub fn line_starts<'source>(source: &'source str) -> impl 'source + Iterator<Item = usize> {
252    std::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1))
253}
254
255/// A file database that contains a single source file.
256///
257/// Because there is only single file in this database we use `()` as a [`FileId`].
258///
259/// This is useful for simple language tests, but it might be worth creating a
260/// custom implementation when a language scales beyond a certain size.
261///
262/// [`FileId`]: Files::FileId
263#[derive(Debug, Clone)]
264pub struct SimpleFile<Name, Source> {
265    /// The name of the file.
266    name: Name,
267    /// The source code of the file.
268    source: Source,
269    /// The starting byte indices in the source code.
270    line_starts: Vec<usize>,
271}
272
273impl<Name, Source> SimpleFile<Name, Source>
274where
275    Name: std::fmt::Display,
276    Source: AsRef<str>,
277{
278    /// Create a new source file.
279    pub fn new(name: Name, source: Source) -> SimpleFile<Name, Source> {
280        SimpleFile {
281            name,
282            line_starts: line_starts(source.as_ref()).collect(),
283            source,
284        }
285    }
286
287    /// Return the name of the file.
288    pub fn name(&self) -> &Name {
289        &self.name
290    }
291
292    /// Return the source of the file.
293    pub fn source(&self) -> &Source {
294        &self.source
295    }
296
297    /// Return the starting byte index of the line with the specified line index.
298    /// Convenience method that already generates errors if necessary.
299    fn line_start(&self, line_index: usize) -> Result<usize, Error> {
300        use std::cmp::Ordering;
301
302        match line_index.cmp(&self.line_starts.len()) {
303            Ordering::Less => Ok(self
304                .line_starts
305                .get(line_index)
306                .cloned()
307                .expect("failed despite previous check")),
308            Ordering::Equal => Ok(self.source.as_ref().len()),
309            Ordering::Greater => Err(Error::LineTooLarge {
310                given: line_index,
311                max: self.line_starts.len() - 1,
312            }),
313        }
314    }
315}
316
317impl<'a, Name, Source> Files<'a> for SimpleFile<Name, Source>
318where
319    Name: 'a + std::fmt::Display + Clone,
320    Source: 'a + AsRef<str>,
321{
322    type FileId = ();
323    type Name = Name;
324    type Source = &'a str;
325
326    fn name(&self, (): ()) -> Result<Name, Error> {
327        Ok(self.name.clone())
328    }
329
330    fn source(&self, (): ()) -> Result<&str, Error> {
331        Ok(self.source.as_ref())
332    }
333
334    fn line_index(&self, (): (), byte_index: usize) -> Result<usize, Error> {
335        Ok(self
336            .line_starts
337            .binary_search(&byte_index)
338            .unwrap_or_else(|next_line| next_line - 1))
339    }
340
341    fn line_range(&self, (): (), line_index: usize) -> Result<Range<usize>, Error> {
342        let line_start = self.line_start(line_index)?;
343        let next_line_start = self.line_start(line_index + 1)?;
344
345        Ok(line_start..next_line_start)
346    }
347}
348
349/// A file database that can store multiple source files.
350///
351/// This is useful for simple language tests, but it might be worth creating a
352/// custom implementation when a language scales beyond a certain size.
353/// It is a glorified `Vec<SimpleFile>` that implements the `Files` trait.
354#[derive(Debug, Clone)]
355pub struct SimpleFiles<Name, Source> {
356    files: Vec<SimpleFile<Name, Source>>,
357}
358
359impl<Name, Source> SimpleFiles<Name, Source>
360where
361    Name: std::fmt::Display,
362    Source: AsRef<str>,
363{
364    /// Create a new files database.
365    pub fn new() -> SimpleFiles<Name, Source> {
366        SimpleFiles { files: Vec::new() }
367    }
368
369    /// Add a file to the database, returning the handle that can be used to
370    /// refer to it again.
371    pub fn add(&mut self, name: Name, source: Source) -> usize {
372        let file_id = self.files.len();
373        self.files.push(SimpleFile::new(name, source));
374        file_id
375    }
376
377    /// Get the file corresponding to the given id.
378    pub fn get(&self, file_id: usize) -> Result<&SimpleFile<Name, Source>, Error> {
379        self.files.get(file_id).ok_or(Error::FileMissing)
380    }
381}
382
383impl<'a, Name, Source> Files<'a> for SimpleFiles<Name, Source>
384where
385    Name: 'a + std::fmt::Display + Clone,
386    Source: 'a + AsRef<str>,
387{
388    type FileId = usize;
389    type Name = Name;
390    type Source = &'a str;
391
392    fn name(&self, file_id: usize) -> Result<Name, Error> {
393        Ok(self.get(file_id)?.name().clone())
394    }
395
396    fn source(&self, file_id: usize) -> Result<&str, Error> {
397        Ok(self.get(file_id)?.source().as_ref())
398    }
399
400    fn line_index(&self, file_id: usize, byte_index: usize) -> Result<usize, Error> {
401        self.get(file_id)?.line_index((), byte_index)
402    }
403
404    fn line_range(&self, file_id: usize, line_index: usize) -> Result<Range<usize>, Error> {
405        self.get(file_id)?.line_range((), line_index)
406    }
407}
408
409#[cfg(test)]
410mod test {
411    use super::*;
412
413    const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz";
414
415    #[test]
416    fn line_starts() {
417        let file = SimpleFile::new("test", TEST_SOURCE);
418
419        assert_eq!(
420            file.line_starts,
421            [
422                0,  // "foo\n"
423                4,  // "bar\r\n"
424                9,  // ""
425                10, // "baz"
426            ],
427        );
428    }
429
430    #[test]
431    fn line_span_sources() {
432        let file = SimpleFile::new("test", TEST_SOURCE);
433
434        let line_sources = (0..4)
435            .map(|line| {
436                let line_range = file.line_range((), line).unwrap();
437                &file.source[line_range]
438            })
439            .collect::<Vec<_>>();
440
441        assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"]);
442    }
443}