wright/
parser.rs

1//! This parser module is responsible for turning the stream of [Token]s from the [Lexer] into a tree of [AST] nodes.
2//!
3//! [AST]: crate::ast
4//! [Token]: crate::lexer::token::Token
5
6use error::{ParserError, ParserErrorKind};
7
8use super::lexer::Lexer;
9use crate::{
10    lexer::token::{Token, TokenTy},
11    source_tracking::fragment::Fragment,
12};
13use std::collections::VecDeque;
14
15mod decl;
16pub mod error;
17mod identifier;
18mod literal;
19mod path;
20mod ty;
21
22/// The [Parser] struct wraps a [Lexer] and adds lookahead and functions that are useful for parsing.
23#[derive(Debug)]
24pub struct Parser {
25    lexer: Lexer,
26    lookahead: VecDeque<Token>,
27}
28
29impl Parser {
30    /// Construct a new parser around a given [Lexer].
31    pub fn new(lexer: Lexer) -> Self {
32        Parser {
33            lexer,
34            lookahead: VecDeque::new(),
35        }
36    }
37
38    /// Get the [Lexer] that's wrapped.
39    pub fn lexer(&self) -> &Lexer {
40        &self.lexer
41    }
42
43    /// Lookahead `k` [Token]s.
44    ///
45    /// If `k == 0` then this is effectively peeking at the next [Token] from the wrapped [Lexer].
46    pub fn lookahead(&mut self, k: usize) -> Option<&Token> {
47        while self.lookahead.len() <= k {
48            self.lookahead.push_back(self.lexer.next_token()?);
49        }
50
51        // SAFETY: It's certain that if this function reaches this line, this access is infallible.
52        Some(unsafe { self.lookahead.get(k).unwrap_unchecked() })
53    }
54
55    /// Peek at the next token from the [Lexer] (cached in the lookahead queue if peeked before).
56    pub fn peek(&mut self) -> Option<&Token> {
57        self.lookahead(0)
58    }
59
60    /// Peek the [Fragment] of the next [Token].
61    pub fn peek_fragment(&mut self) -> Option<&Fragment> {
62        self.peek().map(|token| &token.fragment)
63    }
64
65    /// Peek the [TokenTy] of the next [Token].
66    pub fn peek_variant(&mut self) -> Option<TokenTy> {
67        self.peek().map(|token| token.variant)
68    }
69
70    /// Similar to [Parser::lookahead] but instead returns a slice of `n` [Token]s, starting with the next [Token].
71    ///
72    /// Returns [None] if `n` is greater than the number of remaining [Token]s for this [Parser].
73    pub fn lookahead_window(&mut self, n: usize) -> Option<&[Token]> {
74        // Pull tokens from lexer
75        while self.lookahead.len() < n {
76            self.lookahead.push_back(self.lexer.next_token()?);
77        }
78
79        // Use make contiguous here to get a unified/single slice.
80        Some(&self.lookahead.make_contiguous()[..n])
81    }
82
83    /// Peek the next token that's not whitespace.
84    pub fn peek_next_not_whitespace(&mut self) -> Option<&Token> {
85        // There's no way to do this in safe rust, despite the memory accesses being fine,
86        // so we do it unsafely here
87
88        for i in 0.. {
89            let peek = self.lookahead(i)?;
90
91            if peek.variant != TokenTy::Whitespace {
92                // This bit prevents the rust compiler from thinking we're breaking
93                // lifetime/aliasing rules by mutating the internal state in the next
94                // iteration of the loop while still holding a reference to the peeked token.
95                unsafe {
96                    let const_ref = &raw const *peek;
97                    let upcast = &*const_ref;
98                    return Some(upcast);
99                }
100            }
101        }
102
103        // Safety: For large enough values of `i`, self.lookahead eventually has to return `None`
104        unsafe { std::hint::unreachable_unchecked() }
105    }
106
107    /// Get the number of remaining bytes on this parser. This is potentially useful for checking
108    /// if a parser has advanced between two calls (or checking if a parser has reached end of input).
109    pub fn bytes_remaining(&self) -> usize {
110        let bytes_remaining_in_lookahead_buffer = self
111            .lookahead
112            .iter()
113            .map(|t| t.fragment.len())
114            .sum::<usize>();
115
116        let bytes_remaining_in_lexer = self.lexer.bytes_remaining();
117
118        bytes_remaining_in_lexer + bytes_remaining_in_lookahead_buffer
119    }
120
121    /// Get the next [Token] from this [Parser]. This may be a token that's already been peeked.
122    ///
123    /// Skips any non-document comments encountered via the lexer implementation.
124    ///
125    /// Return an error if a [Token] with [TokenTy::Unknown] is encountered.
126    pub fn next_token(&mut self) -> Result<Option<Token>, ParserError> {
127        let token = self
128            .lookahead
129            .pop_front()
130            .or_else(|| self.lexer.next_token());
131
132        // Check for unknown tokens, which should always convert to an error.
133        if let Some(ref t) = token
134            && t.variant == TokenTy::Unknown
135        {
136            // Clone here is avoidable but this code path is super unlikely and
137            // probably optimized heavily.
138            Err(ParserErrorKind::EncounteredUnknownToken.at(t.fragment.clone()))
139        } else {
140            Ok(token)
141        }
142    }
143
144    /// Advance this [Parser] by `n` [Token]s. If this [Parser] runs out of [Token]s, panic.
145    ///
146    /// Panics
147    /// - If `n` is greater than the number of remaining tokens.
148    pub fn advance(&mut self, n: usize) {
149        // Add tokens to the lookahead buffer until we have enough to split off.
150        while self.lookahead.len() < n {
151            let token = self
152                .lexer
153                .next_token()
154                .expect("advance: `n` <= number of remaining tokens");
155
156            self.lookahead.push_back(token);
157        }
158
159        // Split them off.
160        self.lookahead = self.lookahead.split_off(n);
161    }
162
163    /// Peek the [Fragment] of the next [Token] and clone it or return a clone of the
164    /// remainder [Fragment] of the internal [Lexer]
165    /// (which will be empty, since there wasn't a [Token] to peek).
166    ///
167    /// This is likely only useful for error reporting -- a clone of a potentially empty fragment is
168    /// rarely ever useful otherwise.
169    pub fn peek_fragment_or_rest_cloned(&mut self) -> Fragment {
170        match self.peek() {
171            Some(Token { fragment, .. }) => fragment.clone(),
172            None => {
173                let rest = self.lexer.remaining.clone();
174
175                // Assert that we're making the right assumptions about the remaining fragment.
176                // These are (unidiomatically) done using debug_assert -- perhaps that changes eventually
177                // however it should be fine for now, since this can only produce logic bugs (never memory or
178                // concurrency bugs).
179                debug_assert!(rest.is_valid());
180                debug_assert!(rest.is_empty());
181                debug_assert!(rest.is_empty_at_end_of_source());
182
183                rest
184            }
185        }
186    }
187
188    /// Get the next [Token] from this parser if its [Token::variant] is the given `token_ty`.
189    pub fn next_if_is(&mut self, token_ty: TokenTy) -> Option<Token> {
190        // Peeking successfully first means that the lookahead vec will never be empty here.
191        (self.peek_variant()? == token_ty)
192            // SAFETY: We just peeked a token to check its variant so this unwrap is always ok.
193            .then(|| unsafe { self.lookahead.pop_front().unwrap_unchecked() })
194    }
195
196    /// Peek at the next [Token]s of this [Parser] and determine if the [Token::variant]s match this
197    /// sequence of [TokenTy]s.
198    pub fn matches(&mut self, seq: &[TokenTy]) -> bool {
199        // Use the rare let-else to ensure there are at minimum, the given number of tokens remaining.
200        let Some(lookahead_window) = self.lookahead_window(seq.len()) else {
201            return false;
202        };
203
204        // Use a zipped iterator to compare all the token variants.
205        lookahead_window
206            .iter()
207            .zip(seq)
208            .all(|(token, matches)| token.variant == *matches)
209    }
210
211    /// Consume & remove all whitespace tokens from the front of the parser.
212    pub fn consume_optional_whitespace(&mut self) {
213        // Iterate until the next token is not whitespace.
214        while self.next_if_is(TokenTy::Whitespace).is_some() {}
215    }
216
217    /// Require a whitespace from the [Parser]. Do not advance if the next [Token] is not a whitespace.
218    pub fn consume_at_least_one_whitespace(&mut self) -> Result<(), ParserError> {
219        if self.next_if_is(TokenTy::Whitespace).is_some() {
220            self.consume_optional_whitespace();
221            Ok(())
222        } else {
223            Err(ParserErrorKind::ExpectedWhitespace.at(self.peek_fragment_or_rest_cloned()))
224        }
225    }
226}